From 90dfd76418c8efeab34360911af419f4e7ceb099 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Wed, 27 Sep 2023 16:07:50 +0530
Subject: [PATCH 001/107] Better Posterior Formulation

---
 src/BPINN_ode.jl        |    1 +
 src/advancedHMC_MCMC.jl |  213 ++-
 test/BPINN_Tests.jl     | 4014 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 4210 insertions(+), 18 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index f79f5208f2..f9a68b8917 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -284,6 +284,7 @@ function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
         push!(ensemblecurves, ensemblecurve)
     end
 
+    # estimated using all samples
     nnparams = length(θinit)
     estimnnparams = [Particles(reduce(hcat, samples)[i, :]) for i in 1:nnparams]
 
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 9bd4243cf6..e6b1f24faa 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -80,6 +80,7 @@ end
 
 function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
     return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
+    #  +  L2loss2(Tar, θ)
 end
 
 LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
@@ -88,25 +89,221 @@ function LogDensityProblems.capabilities(::LogTargetDensity)
     LogDensityProblems.LogDensityOrder{1}()
 end
 
+# suggested extra loss function
+function L2loss2(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        dataset, deri_sol = Tar.dataset
+        # deri_sol = deri_sol'
+        autodiff = Tar.autodiff
+
+        # # Timepoints to enforce Physics
+        # dataset = Array(reduce(hcat, dataset)')
+        # t = dataset[end, :]
+        # û = dataset[1:(end - 1), :]
+
+        # ode_params = Tar.extraparams == 1 ?
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        # if length(û[:, 1]) == 1
+        #     physsol = [f(û[:, i][1],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # else
+        #     physsol = [f(û[:, i],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # end
+        # #form of NN output matrix output dim x n
+        # deri_physsol = reduce(hcat, physsol)
+
+        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
+        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+        # if length(û[:, 1]) == 1
+        #     deri_sol = [f(û[:, i][1],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # else
+        #     deri_sol = [f(û[:, i],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # end
+        # deri_sol = reduce(hcat, deri_sol) 
+        # deri_sol = reduce(hcat, derivatives)
+
+        # Timepoints to enforce Physics 
+        t = dataset[end]
+        u1 = dataset[2]
+        û = dataset[1]
+        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
+        #  
+
+        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        if length(Tar.prob.u0) == 1
+            physsol = [f(û[i],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        else
+            physsol = [f([û[i], u1[i]],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        end
+        #form of NN output matrix output dim x n 
+        deri_physsol = reduce(hcat, physsol)
+
+        # if length(Tar.prob.u0) == 1
+        #     nnsol = [f(û[i],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # else
+        #     nnsol = [f([û[i], u1[i]],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # end
+        # form of NN output matrix output dim x n
+        # nnsol = reduce(hcat, nnsol)
+
+        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
+        # # convert to matrix as nnsol  
+
+        physlogprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # can add phystd[i] for u[i] 
+            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+                    LinearAlgebra.Diagonal(map(abs2,
+                        (Tar.l2std[i] * 4.0) .*
+                        ones(length(nnsol[i, :]))))),
+                nnsol[i, :])
+        end
+        return physlogprob
+    else
+        return 0
+    end
+end
+
+# PDE(DU,U,P,T)=0
+
+# Derivated via Central Diff
+# function calculate_derivatives2(dataset)
+#     x̂, time = dataset
+#     num_points = length(x̂)
+#     # Initialize an array to store the derivative values.
+#     derivatives = similar(x̂)
+
+#     for i in 2:(num_points - 1)
+#         # Calculate the first-order derivative using central differences.
+#         Δt_forward = time[i + 1] - time[i]
+#         Δt_backward = time[i] - time[i - 1]
+
+#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+#         derivatives[i] = derivative
+#     end
+
+#     # Derivatives at the endpoints can be calculated using forward or backward differences.
+#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+#     return derivatives
+# end
+
+function calderivatives(prob, dataset)
+    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
+        Flux.Dense(8, 2)) |> Flux.f64
+    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+    function loss(x, y)
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
+        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
+        sum(Flux.mse.(chainflux(x), y))
+    end
+    optimizer = Flux.Optimise.ADAM(0.01)
+    epochs = 3000
+    for epoch in 1:epochs
+        Flux.train!(loss,
+            Flux.params(chainflux),
+            [(dataset[end]', dataset[1:(end - 1)])],
+            optimizer)
+    end
+
+    # A1 = (prob.u0' .+
+    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
+    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
+
+    # A2 = (prob.u0' .+
+    #       (prob.tspan[2] .- (dataset[end]'))' .*
+    #       chainflux(dataset[end]')')
+
+    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
+    A2 = chainflux(dataset[end]')
+
+    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
+
+    return gradients
+end
+
+function calculate_derivatives(dataset)
+
+    # u = dataset[1]
+    # u1 = dataset[2]
+    # t = dataset[end]
+    # # control points
+    # n = Int(floor(length(t) / 10))
+    # # spline for datasetvalues(solution) 
+    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    # interp = CubicSpline(u, t)
+    # interp1 = CubicSpline(u1, t)
+    # # derrivatives interpolation
+    # dx = t[2] - t[1]
+    # time = collect(t[1]:dx:t[end])
+    # smoothu = [interp(i) for i in time]
+    # smoothu1 = [interp1(i) for i in time]
+    # # derivative of the spline (must match function derivative) 
+    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
+    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
+    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
+    # # FDM
+    # # û1 = diff(u) / dx
+    # # dataset[1] and smoothu are almost equal(rounding errors)
+    # return [û, û1] 
+
+end
+
 """
 L2 loss loglikelihood(needed for ODE parameter estimation)
 """
 function L2LossData(Tar::LogTargetDensity, θ)
+    dataset = Tar.dataset
     # check if dataset is provided
-    if Tar.dataset isa Vector{Nothing} || Tar.extraparams == 0
+    if dataset isa Vector{Nothing} || Tar.extraparams == 0
         return 0
     else
         # matrix(each row corresponds to vector u's rows)
-        nn = Tar(Tar.dataset[end], θ[1:(length(θ) - Tar.extraparams)])
+        nn = Tar(dataset[end], θ[1:(length(θ) - Tar.extraparams)])
 
         L2logprob = 0
         for i in 1:length(Tar.prob.u0)
             # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
             L2logprob += logpdf(MvNormal(nn[i, :],
                     LinearAlgebra.Diagonal(map(abs2,
-                        Tar.l2std[i] .*
-                        ones(length(Tar.dataset[i]))))),
-                Tar.dataset[i])
+                        (Tar.l2std[i] * 0.5) .*
+                        ones(length(dataset[i]))))),
+                dataset[i])
         end
         return L2logprob
     end
@@ -174,6 +371,7 @@ function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
         innerdiff(Tar, f, autodiff, [t], θ, ode_params)
     end
     intprob = IntegralProblem(integrand, tspan[1], tspan[2], θ; nout = length(Tar.prob.u0))
+    # add dataset logpdf?
     sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
     sum(sol.u)
 end
@@ -545,6 +743,11 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     metric = Metric(nparameters)
     hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
 
+    println("physics Logpdf is : ", physloglikelihood(ℓπ, initial_θ))
+    println("prior Logpdf is : ", priorweights(ℓπ, initial_θ))
+    println("L2lossData Logpdf is : ", L2LossData(ℓπ, initial_θ))
+    println("L2loss2 Logpdf is : ", L2loss2(ℓπ, initial_θ))
+
     # parallel sampling option
     if nchains != 1
         # Cache to store the chains
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index b04483015b..fa2f04073e 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -109,9 +109,9 @@ u = sol1.u
 time = sol1.t
 
 # BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
-ta = range(tspan[1], tspan[2], length = 100)
+ta = range(tspan[1], tspan[2], length = 25)
 u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+x̂ = collect(Float64, Array(u) .+ (0.2 .* Array(u) .* randn(size(u))))
 time = vec(collect(Float64, ta))
 dataset = [x̂, time]
 physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
@@ -123,6 +123,10 @@ x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
 time1 = vec(collect(Float64, ta0))
 physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 
+using Plots, StatsPlots
+# plot(dataset[2], calderivatives(dataset)')
+yu = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+plot(yu, [linear_analytic(u0, p, t) for t in yu])
 chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
 chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
 init1, re1 = destructure(chainflux1)
@@ -151,13 +155,56 @@ fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
     n_leapfrog = 30)
 
 alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 10.0),
+    l2std = [0.005], phystd = [0.01],
+    param = [Normal(11, 6)],
     Metric = DiagEuclideanMetric,
     n_leapfrog = 30)
-
+# original paper (pure data 0 1)
+sol1flux = solve(prob, alg)
+sol1flux.estimated_ode_params
+# pure data method 1 1
 sol2flux = solve(prob, alg)
+sol2flux.estimated_ode_params
+# pure data method 1 0
+sol3flux = solve(prob, alg)
+sol3flux.estimated_ode_params
+# deri collocation
+sol4flux = solve(prob, alg)
+sol4flux.estimated_ode_params
+# collocation
+sol5flux = solve(prob, alg)
+sol5flux.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux = solve(prob, alg)
+sol6flux.estimated_ode_params
+# 2500 iters
+sol7flux = solve(prob, alg)
+sol7flux.estimated_ode_params
+
+plotly()
+plot!(yu, sol1flux.ensemblesol[1])
+plot!(yu, sol2flux.ensemblesol[1])
+plot!(yu, sol3flux.ensemblesol[1])
+plot!(yu, sol4flux.ensemblesol[1])
+plot!(yu, sol5flux.ensemblesol[1])
+plot!(yu, sol6flux.ensemblesol[1])
+
+plot!(dataset[2], dataset[1])
+
+# plot!(sol4flux.ensemblesol[1])
+# plot!(sol5flux.ensemblesol[1])
+
+sol2flux.estimated_ode_params
+
+sol1flux.estimated_ode_params
+
+sol3flux.estimated_ode_params
+
+sol4flux.estimated_ode_params
+
+sol5flux.estimated_ode_params
 
 alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset,
     draw_samples = 2500,
@@ -199,7 +246,7 @@ meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 @test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
 
 # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - sol2flux.estimated_ode_params[1]) < abs(0.15 * p)
+@test abs(p - sol1flux.estimated_ode_params[1]) < abs(0.15 * p)
 @test abs(p - sol2lux.estimated_ode_params[1]) < abs(0.15 * p)
 
 ## PROBLEM-2
@@ -230,6 +277,37 @@ chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6
 init1, re1 = destructure(chainflux12)
 θinit, st = Lux.setup(Random.default_rng(), chainlux12)
 
+using Flux
+using Random
+
+function derivatives(chainflux, dataset)
+    loss(x, y) = Flux.mse(chainflux(x), y)
+    optimizer = Flux.Optimise.ADAM(0.01)
+    epochs = 2500
+    for epoch in 1:epochs
+        Flux.train!(loss, Flux.params(chainflux), [(dataset[2]', dataset[1]')], optimizer)
+    end
+    getgradient(chainflux, dataset)
+end
+
+function getgradient(chainflux, dataset)
+    return (chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64)))) .-
+            chainflux(dataset[end]')) ./
+           sqrt(eps(eltype(dataset[end][1])))
+end
+
+ans = derivatives(chainflux12, dataset)
+
+init3, re = destructure(chainflux12)
+init2 == init1
+init3 == init2
+plot!(dataset[end], ans')
+plot!(dataset[end], chainflux12(dataset[end]')')
+
+ars = getgradient(chainflux12, dataset)
+
+plot!(dataset[end], ars')
+
 fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob,
     chainflux12,
     draw_samples = 1500,
@@ -277,10 +355,10 @@ fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob,
     ],
     n_leapfrog = 30)
 
-alg = NeuralPDE.BNNODE(chainflux12,
+alg1 = NeuralPDE.BNNODE(chainflux12,
     dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
+    draw_samples = 500,
+    l2std = [0.01],
     phystd = [
         0.03,
     ],
@@ -290,10 +368,50 @@ alg = NeuralPDE.BNNODE(chainflux12,
         Normal(-7,
             4),
     ],
-    n_leapfrog = 30)
+    n_leapfrog = 30, progress = true)
+
+# original paper (pure data 0 1)
+sol1flux_pestim = solve(prob, alg1)
+sol1flux_pestim.estimated_ode_params
+# pure data method 1 1
+sol2flux_pestim = solve(prob, alg1)
+sol2flux_pestim.estimated_ode_params
+# pure data method 1 0
+sol3flux_pestim = solve(prob, alg1)
+sol3flux_pestim.estimated_ode_params
+# deri collocation
+sol4flux_pestim = solve(prob, alg1)
+sol4flux_pestim.estimated_ode_params
+# collocation
+sol5flux_pestim = solve(prob, alg1)
+sol5flux_pestim.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux_pestim = solve(prob, alg1)
+sol6flux_pestim.estimated_ode_params
+
+using Plots, StatsPlots
+ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+plot(time, u)
+plot!(ars, sol1flux_pestim.ensemblesol[1])
+plot!(ars, sol2flux_pestim.ensemblesol[1])
+plot!(ars, sol3flux_pestim.ensemblesol[1])
+plot!(ars, sol4flux_pestim.ensemblesol[1])
+plot!(ars, sol5flux_pestim.ensemblesol[1])
+plot!(ars, sol6flux_pestim.ensemblesol[1])
+
+sol3flux_pestim.estimated_ode_params
+
+sol4flux_pestim.estimated_ode_params
+
+sol5flux_pestim.estimated_ode_params
 
-sol3flux_pestim = solve(prob, alg)
+sol6flux_pestim.estimated_ode_params
 
+ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+init, re1 = destructure(chainflux12)
+init
+init1
 alg = NeuralPDE.BNNODE(chainlux12,
     dataset = dataset,
     draw_samples = 1500,
@@ -363,4 +481,3874 @@ param1 = sol3flux_pestim.estimated_ode_params[1]
 @test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
 # estimated parameters(lux chain)
 param1 = sol3lux_pestim.estimated_ode_params[1]
-@test abs(param1 - p) < abs(0.45 * p)
\ No newline at end of file
+@test abs(param1 - p) < abs(0.45 * p)
+
+using Plots, StatsPlots
+using NoiseRobustDifferentiation, Weave, DataInterpolations
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood
+# # 25 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+#     draw_samples = 1500, physdt = 1 / 50.0f0, phystd = [0.01],
+#     l2std = [0.01],
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1 = solve(prob, alg)
+# sol2flux1.estimated_ode_params[1] #6.41722 Particles{Float64, 1}, 6.02404 Particles{Float64, 1}
+# sol2flux2 = solve(prob, alg)
+# sol2flux2.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.07509 Particles{Float64, 1}
+# sol2flux3 = solve(prob, alg)
+# sol2flux3.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.00825 Particles{Float64, 1}
+
+# # 50 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11 = solve(prob, alg)
+# sol2flux11.estimated_ode_params[1] #5.71268 Particles{Float64, 1}, 6.07242 Particles{Float64, 1}
+# sol2flux22 = solve(prob, alg)
+# sol2flux22.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.04837 Particles{Float64, 1}
+# sol2flux33 = solve(prob, alg)
+# sol2flux33.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.02838 Particles{Float64, 1}
+
+# # 100 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111 = solve(prob, alg)
+# sol2flux111.estimated_ode_params[1] #6.59097 Particles{Float64, 1}, 5.89384 Particles{Float64, 1}
+# sol2flux222 = solve(prob, alg)
+# sol2flux222.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.88216 Particles{Float64, 1}
+# sol2flux333 = solve(prob, alg)
+# sol2flux333.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.85327 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, full likelihood cdm
+# # 25 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1_cdm = solve(prob, alg)
+# sol2flux1_cdm.estimated_ode_params[1]# 6.50506 Particles{Float64, 1} ,6.38963 Particles{Float64, 1}
+# sol2flux2_cdm = solve(prob, alg)
+# sol2flux2_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.39817 Particles{Float64, 1}
+# sol2flux3_cdm = solve(prob, alg)
+# sol2flux3_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.36296 Particles{Float64, 1}
+
+# # 50 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11_cdm = solve(prob, alg)
+# sol2flux11_cdm.estimated_ode_params[1] #6.52951 Particles{Float64, 1},5.15621 Particles{Float64, 1}
+# sol2flux22_cdm = solve(prob, alg)
+# sol2flux22_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.16363 Particles{Float64, 1}
+# sol2flux33_cdm = solve(prob, alg)
+# sol2flux33_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.15591 Particles{Float64, 1}
+
+# # 100 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111_cdm = solve(prob, alg)
+# sol2flux111_cdm.estimated_ode_params[1] #6.74338 Particles{Float64, 1}, 9.72422 Particles{Float64, 1}
+# sol2flux222_cdm = solve(prob, alg)
+# sol2flux222_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.71991 Particles{Float64, 1}
+# sol2flux333_cdm = solve(prob, alg)
+# sol2flux333_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.75045 Particles{Float64, 1}
+
+# --------------------------------------------------------------------------------------
+#                              NEW SERIES OF TESTS (IN ORDER OF EXECUTION)
+#  -------------------------------------------------------------------------------------
+# original paper implementaion
+# 25 points
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, u .+ 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset1 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+# scatter!(time, u)
+# dataset
+# scatter!(dataset1[2], dataset1[1])
+# plot(time, physsol1)
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_normal = solve(prob, alg)
+sol2flux1_normal.estimated_ode_params[1]  #7.70593 Particles{Float64, 1}, 6.36096 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
+sol2flux2_normal = solve(prob, alg)
+sol2flux2_normal.estimated_ode_params[1] #6.66347 Particles{Float64, 1}, 6.36974 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
+sol2flux3_normal = solve(prob, alg)
+sol2flux3_normal.estimated_ode_params[1] #6.84827 Particles{Float64, 1}, 6.29555 Particles{Float64, 1} | 6.39947 Particles{Float64, 1}
+
+# 50 points
+ta = range(tspan[1], tspan[2], length = 50)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset2 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_normal = solve(prob, alg)
+sol2flux11_normal.estimated_ode_params[1] #7.83577 Particles{Float64, 1},6.24652 Particles{Float64, 1} | 6.34495 Particles{Float64, 1}
+sol2flux22_normal = solve(prob, alg)
+sol2flux22_normal.estimated_ode_params[1] #6.49477 Particles{Float64, 1},6.2118 Particles{Float64, 1} | 6.32476 Particles{Float64, 1}
+sol2flux33_normal = solve(prob, alg)
+sol2flux33_normal.estimated_ode_params[1] #6.47421 Particles{Float64, 1},6.33687 Particles{Float64, 1} | 6.2448 Particles{Float64, 1}
+
+# 100 points
+ta = range(tspan[1], tspan[2], length = 100)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset3 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_normal = solve(prob, alg)
+sol2flux111_normal.estimated_ode_params[1] #5.96604 Particles{Float64, 1},5.99588 Particles{Float64, 1} | 6.19805 Particles{Float64, 1}
+sol2flux222_normal = solve(prob, alg)
+sol2flux222_normal.estimated_ode_params[1] #6.05432 Particles{Float64, 1},6.0768 Particles{Float64, 1} | 6.22948 Particles{Float64, 1}
+sol2flux333_normal = solve(prob, alg)
+sol2flux333_normal.estimated_ode_params[1] #6.08856 Particles{Float64, 1},5.94819 Particles{Float64, 1} | 6.2551 Particles{Float64, 1}
+
+# LOTKA VOLTERRA CASE 
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u01 = [1.0, 1.0]
+p1 = [1.5, 1.0, 3.0, 1.0]
+tspan1 = (0.0, 6.0)
+prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
+
+# chainlux = Lux.Chain(Lux.Dense(1, 7, Lux.tanh), Lux.Dense(7, 7, Lux.tanh), Lux.Dense(7, 2))
+chainflux1 = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2))
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t1 = collect(Float64, prob1.tspan[1]:(1 / 50.0):prob1.tspan[2])
+
+# --------------------------------------------------------------------------
+# original paper implementaion lotka volterra
+# 31 points  
+solution1 = solve(prob1, Tsit5(); saveat = 0.1)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] .+ 0.3 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] .+ 0.3 .* u1[2, :] .* randn(length(u1[2, :]))
+dataset2_1 = [x1, y1, time1]
+plot(dataset2_1[end], dataset2_1[1])
+plot!(dataset2_1[end], dataset2_1[2])
+plot!(time1, u1[1, :])
+plot!(time1, u1[2, :])
+
+alg1 = NeuralPDE.BNNODE(chainflux1,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    physdt = 1 / 20.0,
+    l2std = [
+        0.2,
+        0.2,
+    ],
+    phystd = [
+        0.5,
+        0.5,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(4,
+            3),
+        Normal(-2,
+            4),
+        Normal(0,
+            5),
+        Normal(2.5,
+            2)],
+    n_leapfrog = 30, progress = true)
+
+# original paper (pure data 0 1)
+sol1flux1_lotka = solve(prob1, alg1)
+sol1flux1_lotka.estimated_ode_params
+# pure data method 1 1
+sol2flux1_lotka = solve(prob1, alg1)
+sol2flux1_lotka.estimated_ode_params
+# pure data method 1 0
+sol3flux1_lotka = solve(prob1, alg1)
+sol3flux1_lotka.estimated_ode_params
+# deri collocation
+sol4flux1_lotka = solve(prob1, alg1)
+sol4flux1_lotka.estimated_ode_params
+# collocation
+sol5flux1_lotka = solve(prob1, alg1)
+sol5flux1_lotka.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux1_lotka = solve(prob1, alg1)
+sol6flux1_lotka.estimated_ode_params
+
+sol7flux1_lotka = solve(prob1, alg1)
+sol7flux1_lotka.estimated_ode_params
+
+using Plots, StatsPlots
+plot(dataset2_1[3], u1[1, :])
+plot!(dataset2_1[3], u1[2, :])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol5flux1_normal.ensemblesol[2])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]),
+    sol1flux1_normal.ensemblesol[1],
+    legend = :outerbottomleft)
+sol1flux2_normal = solve(prob1, alg1)
+sol1flux2_normal.estimated_ode_params  #|
+sol1flux3_normal = solve(prob1, alg1)
+sol1flux3_normal.estimated_ode_params  #|
+sol1flux4_normal = solve(prob1, alg1)
+sol1flux4_normal.estimated_ode_params
+
+plotly()
+plot!(title = "yuh")
+plot!(dataset2_1[3], dataset2_1[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux1_normal.ensemblesol[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux2_normal.ensemblesol[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux3_normal.ensemblesol[2])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux4_normal.ensemblesol[1])
+plot(time1, u1[1, :])
+plot!(time1, u1[2, :])
+
+ars = chainflux1(dataset2_1[end]')
+plot(ars[1, :])
+plot!(ars[2, :])
+
+function calculate_derivatives(dataset)
+    u = dataset[1]
+    u1 = dataset[2]
+    t = dataset[end]
+    # control points
+    n = Int(floor(length(t) / 10))
+    # spline for datasetvalues(solution) 
+    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    interp = CubicSpline(u, t)
+    interp1 = CubicSpline(u1, t)
+    # derrivatives interpolation
+    dx = t[2] - t[1]
+    time = collect(t[1]:dx:t[end])
+    smoothu = [interp(i) for i in time]
+    smoothu1 = [interp1(i) for i in time]
+    # derivative of the spline (must match function derivative) 
+    û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
+    û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
+    # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
+    # FDM
+    # û1 = diff(u) / dx
+    # dataset[1] and smoothu are almost equal(rounding errors)
+    return û, û1
+    # return 1
+end
+
+ar = calculate_derivatives(dataset2_1)
+plot(ar[1])
+plot!(ar[2])
+
+# 61 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.1)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_2 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.1,
+        0.1,
+    ],
+    phystd = [
+        0.1,
+        0.1,
+    ],
+    priorsNNw = (0.0,
+        5.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_normal = solve(prob1, alg1)
+sol1flux11_normal.estimated_ode_params #|
+sol1flux22_normal = solve(prob1, alg1)
+sol1flux22_normal.estimated_ode_params #|
+sol1flux33_normal = solve(prob1, alg1)
+sol1flux33_normal.estimated_ode_params #|
+
+# 121 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_3 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.1,
+        0.1,
+    ],
+    phystd = [
+        0.1,
+        0.1,
+    ],
+    priorsNNw = (0.0,
+        5.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_normal = solve(prob1, alg1)
+sol1flux111_normal.estimated_ode_params #|
+sol1flux222_normal = solve(prob1, alg1)
+sol1flux222_normal.estimated_ode_params #|
+sol1flux333_normal = solve(prob1, alg1)
+sol1flux333_normal.estimated_ode_params #| 
+
+# -------------------------------------------------------------------- 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:02:30
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:01:54
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:01:59
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:44
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:41
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:41
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:52
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:49
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:50
+
+# # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+# physics Logpdf is : -6.659143464386241e7
+# prior Logpdf is : -150.30074579848434
+# L2lossData Logpdf is : -6.03075717462954e6
+# Sampling 100%|███████████████████████████████| Time: 0:04:54
+
+# physics Logpdf is : -8.70012053004202e8
+# prior Logpdf is : -150.3750892952511
+# L2lossData Logpdf is : -6.967914805207133e6
+# Sampling 100%|███████████████████████████████| Time: 0:05:09
+
+# physics Logpdf is : -5.417241281343099e7
+# prior Logpdf is : -150.52079555737976
+# L2lossData Logpdf is : -4.195953436792884e6
+# Sampling 100%|███████████████████████████████| Time: 0:05:01
+
+# physics Logpdf is : -4.579552981943833e8
+# prior Logpdf is : -150.30491731974283
+# L2lossData Logpdf is : -8.595475827260146e6
+# Sampling 100%|███████████████████████████████| Time: 0:06:08
+
+# physics Logpdf is : -1.989281834955769e7
+# prior Logpdf is : -150.16009042727543
+# L2lossData Logpdf is : -1.121270659669029e7
+# Sampling 100%|███████████████████████████████| Time: 0:05:38
+
+# physics Logpdf is : -8.683829147264534e8
+# prior Logpdf is : -150.37824872259102
+# L2lossData Logpdf is : -1.0887662888035845e7
+# Sampling 100%|███████████████████████████████| Time: 0:05:50
+
+# physics Logpdf is : -3.1944760610332566e8
+# prior Logpdf is : -150.33610348737565
+# L2lossData Logpdf is : -1.215458786744478e7
+# Sampling 100%|███████████████████████████████| Time: 0:10:50
+
+# physics Logpdf is : -3.2884572300341567e6
+# prior Logpdf is : -150.21002268156343
+# L2lossData Logpdf is : -1.102536731511176e7
+# Sampling 100%|███████████████████████████████| Time: 0:09:53
+
+# physics Logpdf is : -5.31293521002414e8
+# prior Logpdf is : -150.20948536040126
+# L2lossData Logpdf is : -1.818717239584132e7
+# Sampling 100%|███████████████████████████████| Time: 0:08:53
+
+# ----------------------------------------------------------
+# Full likelihood no l2 only new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new = solve(prob, alg)
+sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.21662 Particles{Float64, 1}
+sol2flux2_new = solve(prob, alg)
+sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 7.14238 Particles{Float64, 1}
+sol2flux3_new = solve(prob, alg)
+sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.79159 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new = solve(prob, alg)
+sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 5.33467 Particles{Float64, 1}
+sol2flux22_new = solve(prob, alg)
+sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.52419 Particles{Float64, 1}
+sol2flux33_new = solve(prob, alg)
+sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 5.36921 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new = solve(prob, alg)
+sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.45333 Particles{Float64, 1}
+sol2flux222_new = solve(prob, alg)
+sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 4.64417 Particles{Float64, 1}
+sol2flux333_new = solve(prob, alg)
+sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 5.88037 Particles{Float64, 1}
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new_all = solve(prob, alg)
+sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.4358 Particles{Float64, 1}
+sol2flux2_new_all = solve(prob, alg)
+sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 6.52449 Particles{Float64, 1}
+sol2flux3_new_all = solve(prob, alg)
+sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.34188 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new_all = solve(prob, alg)
+sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.37889 Particles{Float64, 1}
+sol2flux22_new_all = solve(prob, alg)
+sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.34747 Particles{Float64, 1}
+sol2flux33_new_all = solve(prob, alg)
+sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.39699 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new_all = solve(prob, alg)
+sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.24327 Particles{Float64, 1}
+sol2flux222_new_all = solve(prob, alg)
+sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 6.23928 Particles{Float64, 1}
+sol2flux333_new_all = solve(prob, alg)
+sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 6.2145 Particles{Float64, 1}
+
+# ---------------------------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients) lotka volterra
+# 36 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_new_all = solve(prob1, alg1)
+sol1flux1_new_all.estimated_ode_params[1]  #|
+sol1flux2_new_all = solve(prob1, alg1)
+sol1flux2_new_all.estimated_ode_params[1] #|
+sol1flux3_new_all = solve(prob1, alg1)
+sol1flux3_new_all.estimated_ode_params[1] #|
+
+# 61 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_new_all = solve(prob1, alg1)
+sol1flux11_new_all.estimated_ode_params[1] #|
+sol1flux22_new_all = solve(prob1, alg1)
+sol1flux22_new_all.estimated_ode_params[1] #|
+sol1flux33_new_all = solve(prob1, alg1)
+sol1flux33_new_all.estimated_ode_params[1] #|
+
+# 121 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_new_all = solve(prob1, alg1)
+sol1flux111_new_all.estimated_ode_params[1] #|
+sol1flux222_new_all = solve(prob1, alg1)
+sol1flux222_new_all.estimated_ode_params[1] #|
+sol1flux333_new_all = solve(prob1, alg1)
+sol1flux333_new_all.estimated_ode_params[1] #|
+# -------------------------------------------------------------------- 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:32
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:19
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:31
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:45
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:20
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:20
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:04:57
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:05:26
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:05:01
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients)
+# 25 points
+# 1*,2*,  
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_newdata_all = solve(prob, alg)
+sol2flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 5.73072 Particles{Float64, 1}
+sol2flux2_newdata_all = solve(prob, alg)
+sol2flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 5.71597 Particles{Float64, 1}
+sol2flux3_newdata_all = solve(prob, alg)
+sol2flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 5.7313 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_newdata_all = solve(prob, alg)
+sol2flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.07153 Particles{Float64, 1}
+sol2flux22_newdata_all = solve(prob, alg)
+sol2flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.06623 Particles{Float64, 1}
+sol2flux33_newdata_all = solve(prob, alg)
+sol2flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.12748 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_newdata_all = solve(prob, alg)
+sol2flux111_newdata_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.26222 Particles{Float64, 1}
+sol2flux222_newdata_all = solve(prob, alg)
+sol2flux222_newdata_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 5.86494 Particles{Float64, 1}
+sol2flux333_newdata_all = solve(prob, alg)
+sol2flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} |  
+
+# ---------------------------------------------------------------------------
+
+# LOTKA VOLTERRA CASE
+using Plots, StatsPlots
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u01 = [1.0, 1.0]
+p1 = [1.5, 1.0, 3.0, 1.0]
+tspan1 = (0.0, 6.0)
+prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
+
+chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t1 = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+# --------------------------------------------------------------------------
+# original paper implementaion
+# 25 points  
+solution1 = solve(prob1, Tsit5(); saveat = 0.2)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_1 = [x1, y1, time1]
+
+plot(time1, u1[1, :])
+plot!(time1, u1[2, :])
+scatter!(dataset2_1[3], dataset2_1[1])
+scatter!(dataset2_1[3], dataset2_1[2])
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_normal = solve(prob1, alg1)
+sol1flux1_normal.estimated_ode_params[1]  #|
+sol1flux2_normal = solve(prob1, alg1)
+sol1flux2_normal.estimated_ode_params[1] #|
+sol1flux3_normal = solve(prob1, alg1)
+sol1flux3_normal.estimated_ode_params[1] #|
+
+# 50 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_2 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_normal = solve(prob1, alg1)
+sol1flux11_normal.estimated_ode_params[1] #|
+sol1flux22_normal = solve(prob1, alg1)
+sol1flux22_normal.estimated_ode_params[1] #|
+sol1flux33_normal = solve(prob1, alg1)
+sol1flux33_normal.estimated_ode_params[1] #|
+
+# 100 points
+solution = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_3 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_normal = solve(prob1, alg1)
+sol1flux111_normal.estimated_ode_params[1] #|
+sol1flux222_normal = solve(prob1, alg1)
+sol1flux222_normal.estimated_ode_params[1] #|
+sol1flux333_normal = solve(prob1, alg1)
+sol1flux333_normal.estimated_ode_params[1] #|
+
+# --------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood no l2 only new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new = solve(prob, alg)
+sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
+sol2flux2_new = solve(prob, alg)
+sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
+sol2flux3_new = solve(prob, alg)
+sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new = solve(prob, alg)
+sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
+sol2flux22_new = solve(prob, alg)
+sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
+sol2flux33_new = solve(prob, alg)
+sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new = solve(prob, alg)
+sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}   |
+sol2flux222_new = solve(prob, alg)
+sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}     |
+sol2flux333_new = solve(prob, alg)
+sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new_all = solve(prob, alg)
+sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1}  |
+sol2flux2_new_all = solve(prob, alg)
+sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}    |
+sol2flux3_new_all = solve(prob, alg)
+sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}   |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new_all = solve(prob, alg)
+sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
+sol2flux22_new_all = solve(prob, alg)
+sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
+sol2flux33_new_all = solve(prob, alg)
+sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new_all = solve(prob, alg)
+sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}  |
+sol2flux222_new_all = solve(prob, alg)
+sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}    |
+sol2flux333_new_all = solve(prob, alg)
+sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}  |
+
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients)
+# 25 points 
+# *1,*2 vs *2.5
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_newdata_all = solve(prob, alg)
+sol1flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
+sol1flux2_newdata_all = solve(prob, alg)
+sol1flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
+sol1flux3_newdata_all = solve(prob, alg)
+sol1flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_newdata_all = solve(prob, alg)
+sol1flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}    |
+sol1flux22_newdata_all = solve(prob, alg)
+sol1flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}    |
+sol1flux33_newdata_all = solve(prob, alg)
+sol1flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1}   |
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_newdata_all = solve(prob, alg)
+sol1flux111_newdata_all.estimated_ode_params[1]  #|
+sol1flux222_newdata_all = solve(prob, alg)
+sol1flux222_newdata_all.estimated_ode_params[1]  #|
+sol1flux333_newdata_all = solve(prob, alg)
+sol1flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
+
+# ------------------------------------------------------------------------------------------------------------------------------
+
+# sol2flux111.estimated_ode_params[1]
+# # mine *5
+# 7.03386Particles{Float64, 1}
+# # normal
+# 6.38951Particles{Float64, 1}
+# 6.67657Particles{Float64, 1}
+# # mine *10
+# 7.53672Particles{Float64, 1}
+# # mine *2
+# 6.29005Particles{Float64, 1}
+# 6.29844Particles{Float64, 1}
+
+# # new mine *2
+# 6.39008Particles{Float64, 1}
+# 6.22071Particles{Float64, 1}
+# 6.15611Particles{Float64, 1}
+
+# # new mine *2 tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
+# 6.25549Particles{Float64, 1}
+# ----------------------------------------------------------
+
+# ---------------------------------------------------
+
+function calculate_derivatives1(dataset)
+    x̂, time = dataset
+    num_points = length(x̂)
+    # Initialize an array to store the derivative values.
+    derivatives = similar(x̂)
+
+    for i in 2:(num_points - 1)
+        # Calculate the first-order derivative using central differences.
+        Δt_forward = time[i + 1] - time[i]
+        Δt_backward = time[i] - time[i - 1]
+
+        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+        derivatives[i] = derivative
+    end
+
+    # Derivatives at the endpoints can be calculated using forward or backward differences.
+    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+    return derivatives
+end
+
+function calculate_derivatives2(dataset)
+    u = dataset[1]
+    t = dataset[2]
+    # control points
+    n = Int(floor(length(t) / 10))
+    # spline for datasetvalues(solution) 
+    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    interp = CubicSpline(u, t)
+    # derrivatives interpolation
+    dx = t[2] - t[1]
+    time = collect(t[1]:dx:t[end])
+    smoothu = [interp(i) for i in time]
+    # derivative of the spline (must match function derivative) 
+    û = tvdiff(smoothu, 20, 0.03, dx = dx, ε = 1)
+    # tvdiff(smoothu, 100, 0.1, dx = dx)
+    # 
+    # 
+    # FDM
+    û1 = diff(u) / dx
+    # dataset[1] and smoothu are almost equal(rounding errors)
+    return û, time, smoothu, û1
+end
+
+# need to do this for all datasets
+c = [linear(prob.u0, p, t) for t in dataset3[2]] #ideal case
+b = calculate_derivatives1(dataset2) #central diffs
+# a = calculate_derivatives2(dataset) #tvdiff(smoothu, 100, 0.1, dx = dx)
+d = calculate_derivatives2(dataset1) #tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
+d = calculate_derivatives2(dataset2)
+d = calculate_derivatives2(dataset3)
+mean(abs2.(c .- b))
+mean(abs2.(c .- d[1]))
+loss(model, x, y) = mean(abs2.(model(x) .- y));
+scatter!(prob.u0 .+ (prob.tspan[2] .- dataset3[2]) .* chainflux1(dataset3[2]')')
+loss(chainflux1, dataset3[2]', dataset3[1]')
+# mean(abs2.(c[1:24] .- a[4]))
+plot(c, label = "ideal deriv")
+plot!(b, label = "Centraldiff deriv")
+# plot!(a[1], label = "tvdiff(0.1,def) derivatives") 
+plot!(d[1], label = "tvdiff(0.035,20) derivatives")
+plotly()
+
+# GridTraining , NoiseRobustDiff dataset[2][2]-dataset[2][1] l2std
+# 25 points 
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+time1 = collect(tspan[1]:(1 / 50.0):tspan[2])
+physsol = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+plot(physsol, label = "solution")
+
+# plots from 32(deriv)
+# for d
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux1 = solve(prob, alg)
+n2_sol2flux1.estimated_ode_params[1]
+# with extra likelihood 
+# 10.2011Particles{Float64, 1}
+
+# without extra likelihood 
+# 6.25791Particles{Float64, 1}
+# 6.29539Particles{Float64, 1}
+
+plot!(n2_sol2flux1.ensemblesol[1], label = "tvdiff(0.035,1) derivpar")
+plot(dataset[1])
+plot!(physsol1)
+# for a
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux2 = solve(prob, alg)
+n2_sol2flux2.estimated_ode_params[1]
+# with extra likelihood
+# 8.73602Particles{Float64, 1}
+# without extra likelihood
+
+plot!(n2_sol2flux2.ensemblesol[1],
+    label = "tvdiff(0.1,def) derivatives",
+    legend = :outerbottomleft)
+
+# for b
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux3 = solve(prob, alg)
+n2_sol2flux3.estimated_ode_params[1]
+plot!(n2_sol2flux3.ensemblesol[1], label = "Centraldiff deriv")
+
+# for c
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux4 = solve(prob, alg)
+n2_sol2flux4.estimated_ode_params[1]
+plot!(n2_sol2flux4.ensemblesol[1], label = "ideal deriv")
+
+# 50 points 
+
+ta = range(tspan[1], tspan[2], length = 50)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux11 = solve(prob, alg)
+n2_sol2flux11.estimated_ode_params[1]
+
+# 5.90049Particles{Float64, 1}
+# 100 points
+ta = range(tspan[1], tspan[2], length = 100)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux111 = solve(prob, alg)
+n2_sol2flux111.estimated_ode_params[1]
+plot!(n2_sol2flux111.ensemblesol[1])
+8.88555Particles{Float64, 1}
+
+# 7.15353Particles{Float64, 1}
+# 6.21059 Particles{Float64, 1}
+# 6.31836Particles{Float64, 1}
+0.1 * p
+# ----------------------------------------------------------
+
+# Gives the linear interpolation value at t=3.5
+
+# # Problem 1 with param esimation
+# # dataset 0-1 2 percent noise
+# p = 6.283185307179586
+# # partial_logdensity
+# 6.3549Particles{Float64, 1}
+# # full log_density
+# 6.34667Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2lux.estimated_ode_params[1]
+
+# # dataset 0-1 20 percent noise
+# # partial log_density
+# 6.30244Particles{Float64, 1}
+# # full log_density
+# 6.24637Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # dataset 0-2 20percent noise
+# # partial log_density
+# 6.24948Particles{Float64, 1}
+# # full log_density
+# 6.26095Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+# linear = (u, p, t) -> cos(p * t)
+# tspan = (0.0, 2.0)
+
+# # dataset 0-1 2 percent noise
+# p = 6.283185307179586
+# # partial_logdensity
+# 6.3549Particles{Float64, 1}
+# # full log_density
+# 6.34667Particles{Float64, 1}
+
+# # dataset 0-1 20 percent noise
+# # partial log_density
+# 6.30244Particles{Float64, 1}
+# # full log_density
+# 6.24637Particles{Float64, 1}
+
+# # dataset 0-2 20percent noise
+# # partial log_density
+# 6.24948Particles{Float64, 1}
+# # full log_density
+# 6.26095Particles{Float64, 1}
+
+# # dataset 0-2 20percent noise 50 points(above all are 100 points)
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # i kinda win on 25 points again
+# # dataset 0-2 20percent noise 25 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # i win with 25 points
+# # dataset 0-1 20percent noise 25 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# # new
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# # New
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # (9,2.5)(above are (9,0.5))
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # just prev was repeat(just change)
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # i lose on 0-1,50 points
+# # dataset 0-1 20percent noise 50 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # (9,2.5) (above are (9,0.5))
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+# # Problem 1 with param estimation
+# # physdt=1/20, Full likelihood new 0.5*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux1 = solve(prob, alg)
+# n05_sol2flux1.estimated_ode_params[1] #6.90953 Particles{Float64, 1}
+# n05_sol2flux2 = solve(prob, alg)
+# n05_sol2flux2.estimated_ode_params[1] #6.82374 Particles{Float64, 1}
+# n05_sol2flux3 = solve(prob, alg)
+# n05_sol2flux3.estimated_ode_params[1] #6.84465 Particles{Float64, 1}
+
+# using Plots, StatsPlots
+# plot(n05_sol2flux3.ensemblesol[1])
+# plot!(physsol1)
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux11 = solve(prob, alg)
+# n05_sol2flux11.estimated_ode_params[1] #7.0262 Particles{Float64, 1}
+# n05_sol2flux22 = solve(prob, alg)
+# n05_sol2flux22.estimated_ode_params[1] #5.56438 Particles{Float64, 1}
+# n05_sol2flux33 = solve(prob, alg)
+# n05_sol2flux33.estimated_ode_params[1] #7.27189 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux111 = solve(prob, alg)
+# n05_sol2flux111.estimated_ode_params[1] #6.90549 Particles{Float64, 1}
+# n05_sol2flux222 = solve(prob, alg)
+# n05_sol2flux222.estimated_ode_params[1] #5.42436 Particles{Float64, 1}
+# n05_sol2flux333 = solve(prob, alg)
+# n05_sol2flux333.estimated_ode_params[1] #6.05832 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new 2*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux1 = solve(prob, alg)
+# n2_sol2flux1.estimated_ode_params[1]#6.9087 Particles{Float64, 1}
+# n2_sol2flux2 = solve(prob, alg)
+# n2_sol2flux2.estimated_ode_params[1]#6.86507 Particles{Float64, 1}
+# n2_sol2flux3 = solve(prob, alg)
+# n2_sol2flux3.estimated_ode_params[1]#6.59206 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux11 = solve(prob, alg)
+# n2_sol2flux11.estimated_ode_params[1]#7.3715 Particles{Float64, 1}
+# n2_sol2flux22 = solve(prob, alg)
+# n2_sol2flux22.estimated_ode_params[1]#9.84477 Particles{Float64, 1}
+# n2_sol2flux33 = solve(prob, alg)
+# n2_sol2flux33.estimated_ode_params[1]#6.87107 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux111 = solve(prob, alg)
+# n2_sol2flux111.estimated_ode_params[1]#6.60739 Particles{Float64, 1}
+# n2_sol2flux222 = solve(prob, alg)
+# n2_sol2flux222.estimated_ode_params[1]#7.05923 Particles{Float64, 1}
+# n2_sol2flux333 = solve(prob, alg)
+# n2_sol2flux333.estimated_ode_params[1]#6.5017 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new all 2*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux1 = solve(prob, alg)
+# n2all5sol2flux1.estimated_ode_params[1]#11.3659 Particles{Float64, 1}
+# n2all5sol2flux2 = solve(prob, alg)
+# n2all5sol2flux2.estimated_ode_params[1]#6.65634 Particles{Float64, 1}
+# n2all5sol2flux3 = solve(prob, alg)
+# n2all5sol2flux3.estimated_ode_params[1]#6.61905 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux11 = solve(prob, alg)
+# n2all5sol2flux11.estimated_ode_params[1]#6.27555 Particles{Float64, 1}
+# n2all5sol2flux22 = solve(prob, alg)
+# n2all5sol2flux22.estimated_ode_params[1]#6.24352 Particles{Float64, 1}
+# n2all5sol2flux33 = solve(prob, alg)
+# n2all5sol2flux33.estimated_ode_params[1]#6.33723 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux111 = solve(prob, alg)
+# n2all5sol2flux111.estimated_ode_params[1] #5.95535 Particles{Float64, 1}
+# n2all5sol2flux222 = solve(prob, alg)
+# n2all5sol2flux222.estimated_ode_params[1] #5.98301 Particles{Float64, 1}
+# n2all5sol2flux333 = solve(prob, alg)
+# n2all5sol2flux333.estimated_ode_params[1] #5.9081 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new all (l2+l22)
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux1 = solve(prob, alg)
+# nall5sol2flux1.estimated_ode_params[1]#6.54705 Particles{Float64, 1}
+# nall5sol2flux2 = solve(prob, alg)
+# nall5sol2flux2.estimated_ode_params[1]#6.6967 Particles{Float64, 1}
+# nall5sol2flux3 = solve(prob, alg)
+# nall5sol2flux3.estimated_ode_params[1]#6.47173 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux11 = solve(prob, alg)
+# nall5sol2flux11.estimated_ode_params[1]#6.2113 Particles{Float64, 1}
+# nall5sol2flux22 = solve(prob, alg)
+# nall5sol2flux22.estimated_ode_params[1]#6.10675 Particles{Float64, 1}
+# nall5sol2flux33 = solve(prob, alg)
+# nall5sol2flux33.estimated_ode_params[1]#6.11541 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux111 = solve(prob, alg)
+# nall5sol2flux111.estimated_ode_params[1]#6.35224 Particles{Float64, 1}
+# nall5sol2flux222 = solve(prob, alg)
+# nall5sol2flux222.estimated_ode_params[1]#6.40542 Particles{Float64, 1}
+# nall5sol2flux333 = solve(prob, alg)
+# nall5sol2flux333.estimated_ode_params[1]#6.44206 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new 5* (new only l22 mod)
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux1 = solve(prob, alg)
+# n5sol2flux1.estimated_ode_params[1]#7.05077 Particles{Float64, 1}
+# n5sol2flux2 = solve(prob, alg)
+# n5sol2flux2.estimated_ode_params[1]#7.07303 Particles{Float64, 1}
+# n5sol2flux3 = solve(prob, alg)
+# n5sol2flux3.estimated_ode_params[1]#5.10622 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux11 = solve(prob, alg)
+# n5sol2flux11.estimated_ode_params[1]#7.39852 Particles{Float64, 1}
+# n5sol2flux22 = solve(prob, alg)
+# n5sol2flux22.estimated_ode_params[1]#7.30319 Particles{Float64, 1}
+# n5sol2flux33 = solve(prob, alg)
+# n5sol2flux33.estimated_ode_params[1]#6.73722 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux111 = solve(prob, alg)
+# n5sol2flux111.estimated_ode_params[1]#7.15996 Particles{Float64, 1}
+# n5sol2flux222 = solve(prob, alg)
+# n5sol2flux222.estimated_ode_params[1]#7.02949 Particles{Float64, 1}
+# n5sol2flux333 = solve(prob, alg)
+# n5sol2flux333.estimated_ode_params[1]#6.9393 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux1 = solve(prob, alg)
+# nsol2flux1.estimated_ode_params[1] #5.82707 Particles{Float64, 1}
+# nsol2flux2 = solve(prob, alg)
+# nsol2flux2.estimated_ode_params[1] #4.81534 Particles{Float64, 1}
+# nsol2flux3 = solve(prob, alg)
+# nsol2flux3.estimated_ode_params[1] #5.52965 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux11 = solve(prob, alg)
+# nsol2flux11.estimated_ode_params[1] #7.04027 Particles{Float64, 1}
+# nsol2flux22 = solve(prob, alg)
+# nsol2flux22.estimated_ode_params[1] #7.17588 Particles{Float64, 1}
+# nsol2flux33 = solve(prob, alg)
+# nsol2flux33.estimated_ode_params[1] #6.94495 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux111 = solve(prob, alg)
+# nsol2flux111.estimated_ode_params[1] #6.06608 Particles{Float64, 1}
+# nsol2flux222 = solve(prob, alg)
+# nsol2flux222.estimated_ode_params[1] #6.84726 Particles{Float64, 1}
+# nsol2flux333 = solve(prob, alg)
+# nsol2flux333.estimated_ode_params[1] #6.83463 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1 = solve(prob, alg)
+# sol2flux1.estimated_ode_params[1] #6.71397 Particles{Float64, 1} 6.37604 Particles{Float64, 1}
+# sol2flux2 = solve(prob, alg)
+# sol2flux2.estimated_ode_params[1] #6.73509 Particles{Float64, 1} 6.21692 Particles{Float64, 1}
+# sol2flux3 = solve(prob, alg)
+# sol2flux3.estimated_ode_params[1] #6.65453 Particles{Float64, 1} 6.23153 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11 = solve(prob, alg)
+# sol2flux11.estimated_ode_params[1] #6.23443 Particles{Float64, 1} 6.30635 Particles{Float64, 1}
+# sol2flux22 = solve(prob, alg)
+# sol2flux22.estimated_ode_params[1] #6.18879 Particles{Float64, 1} 6.30099 Particles{Float64, 1}
+# sol2flux33 = solve(prob, alg)
+# sol2flux33.estimated_ode_params[1] #6.22773 Particles{Float64, 1} 6.30671 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111 = solve(prob, alg)
+# sol2flux111.estimated_ode_params[1] #6.15832 Particles{Float64, 1} 6.35453 Particles{Float64, 1}
+# sol2flux222 = solve(prob, alg)
+# sol2flux222.estimated_ode_params[1] #6.16968 Particles{Float64, 1}6.31125 Particles{Float64, 1}
+# sol2flux333 = solve(prob, alg)
+# sol2flux333.estimated_ode_params[1] #6.12466 Particles{Float64, 1} 6.26514 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1_p = solve(prob, alg)
+# sol2flux1_p.estimated_ode_params[1] #5.74065 Particles{Float64, 1} #6.83683 Particles{Float64, 1}
+# sol2flux2_p = solve(prob, alg)
+# sol2flux2_p.estimated_ode_params[1] #9.82504 Particles{Float64, 1} #6.14568 Particles{Float64, 1}
+# sol2flux3_p = solve(prob, alg)
+# sol2flux3_p.estimated_ode_params[1] #5.75075 Particles{Float64, 1} #6.08579 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11_p = solve(prob, alg)
+# sol2flux11_p.estimated_ode_params[1] #6.19414 Particles{Float64, 1} #6.04621 Particles{Float64, 1}
+# sol2flux22_p = solve(prob, alg)
+# sol2flux22_p.estimated_ode_params[1] #6.15227 Particles{Float64, 1} #6.29086 Particles{Float64, 1}
+# sol2flux33_p = solve(prob, alg)
+# sol2flux33_p.estimated_ode_params[1] #6.19048 Particles{Float64, 1} #6.12516 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111_p = solve(prob, alg)
+# sol2flux111_p.estimated_ode_params[1] #6.51608 Particles{Float64, 1}# 6.42945Particles{Float64, 1}
+# sol2flux222_p = solve(prob, alg)
+# sol2flux222_p.estimated_ode_params[1] #6.4875 Particles{Float64, 1} # 6.44524Particles{Float64, 1}
+# sol2flux333_p = solve(prob, alg)
+# sol2flux333_p.estimated_ode_params[1] #6.51679 Particles{Float64, 1}# 6.43152Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood, dataset(1.0-2.0)
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux1 = solve(prob, alg)
+# sol1flux1.estimated_ode_params[1] #6.35164 Particles{Float64, 1}
+# sol1flux2 = solve(prob, alg)
+# sol1flux2.estimated_ode_params[1] #6.30919 Particles{Float64, 1}
+# sol1flux3 = solve(prob, alg)
+# sol1flux3.estimated_ode_params[1] #6.33554 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux11 = solve(prob, alg)
+# sol1flux11.estimated_ode_params[1] #6.39769 Particles{Float64, 1}
+# sol1flux22 = solve(prob, alg)
+# sol1flux22.estimated_ode_params[1] #6.43924 Particles{Float64, 1}
+# sol1flux33 = solve(prob, alg)
+# sol1flux33.estimated_ode_params[1] #6.4697 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux111 = solve(prob, alg)
+# sol1flux111.estimated_ode_params[1] #6.27812 Particles{Float64, 1}
+# sol1flux222 = solve(prob, alg)
+# sol1flux222.estimated_ode_params[1] #6.19278 Particles{Float64, 1}
+# sol1flux333 = solve(prob, alg)
+# sol1flux333.estimated_ode_params[1] # 9.68244Particles{Float64, 1} (first try) # 6.23969 Particles{Float64, 1}(second try)
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1.0-2.0)
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux1_p = solve(prob, alg)
+# sol1flux1_p.estimated_ode_params[1]#6.36269 Particles{Float64, 1}
+
+# sol1flux2_p = solve(prob, alg)
+# sol1flux2_p.estimated_ode_params[1]#6.34685 Particles{Float64, 1}
+
+# sol1flux3_p = solve(prob, alg)
+# sol1flux3_p.estimated_ode_params[1]#6.31421 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux11_p = solve(prob, alg)
+# sol1flux11_p.estimated_ode_params[1] #6.15725 Particles{Float64, 1}
+
+# sol1flux22_p = solve(prob, alg)
+# sol1flux22_p.estimated_ode_params[1] #6.18145 Particles{Float64, 1}
+
+# sol1flux33_p = solve(prob, alg)
+# sol1flux33_p.estimated_ode_params[1] #6.21905 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux111_p = solve(prob, alg)
+# sol1flux111_p.estimated_ode_params[1]#6.13481 Particles{Float64, 1}
+
+# sol1flux222_p = solve(prob, alg)
+# sol1flux222_p.estimated_ode_params[1]#9.68555 Particles{Float64, 1}
+
+# sol1flux333_p = solve(prob, alg)
+# sol1flux333_p.estimated_ode_params[1]#6.1477 Particles{Float64, 1}
+
+# # -----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1-2), again but different density
+# # 12 points
+# ta = range(1.0, tspan[2], length = 12)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux1_p = solve(prob, alg)
+# sol3flux1_p.estimated_ode_params[1]#6.50048 Particles{Float64, 1}
+# sol3flux2_p = solve(prob, alg)
+# sol3flux2_p.estimated_ode_params[1]#6.57597 Particles{Float64, 1}
+# sol3flux3_p = solve(prob, alg)
+# sol3flux3_p.estimated_ode_params[1]#6.24487 Particles{Float64, 1}
+
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux11_p = solve(prob, alg)
+# sol3flux11_p.estimated_ode_params[1]#6.53093 Particles{Float64, 1}
+
+# sol3flux22_p = solve(prob, alg)
+# sol3flux22_p.estimated_ode_params[1]#6.32744 Particles{Float64, 1}
+
+# sol3flux33_p = solve(prob, alg)
+# sol3flux33_p.estimated_ode_params[1]#6.49175 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux111_p = solve(prob, alg)
+# sol3flux111_p.estimated_ode_params[1]#6.4455 Particles{Float64, 1}
+# sol3flux222_p = solve(prob, alg)
+# sol3flux222_p.estimated_ode_params[1]#6.40736 Particles{Float64, 1}
+# sol3flux333_p = solve(prob, alg)
+# sol3flux333_p.estimated_ode_params[1]#6.46214 Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(0-1)
+# # 25 points
+# ta = range(tspan[1], 1.0, length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux1_p = solve(prob, alg)
+# sol0flux1_p.estimated_ode_params[1]#7.12625 Particles{Float64, 1}
+# sol0flux2_p = solve(prob, alg)
+# sol0flux2_p.estimated_ode_params[1]#8.40948 Particles{Float64, 1}
+# sol0flux3_p = solve(prob, alg)
+# sol0flux3_p.estimated_ode_params[1]#7.18768 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], 1.0, length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux11_p = solve(prob, alg)
+# sol0flux11_p.estimated_ode_params[1]#6.23707 Particles{Float64, 1}
+# sol0flux22_p = solve(prob, alg)
+# sol0flux22_p.estimated_ode_params[1]#6.09728 Particles{Float64, 1}
+# sol0flux33_p = solve(prob, alg)
+# sol0flux33_p.estimated_ode_params[1]#6.12971 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], 1.0, length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux111_p = solve(prob, alg)
+# sol0flux111_p.estimated_ode_params[1]#5.99039 Particles{Float64, 1}
+# sol0flux222_p = solve(prob, alg)
+# sol0flux222_p.estimated_ode_params[1]#5.89609 Particles{Float64, 1}
+# sol0flux333_p = solve(prob, alg)
+# sol0flux333_p.estimated_ode_params[1]#5.91923 Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f1 = solve(prob, alg)
+# sol1f1.estimated_ode_params[1]
+# # 10.9818Particles{Float64, 1}
+# sol1f2 = solve(prob, alg)
+# sol1f2.estimated_ode_params[1]
+# # sol1f3 = solve(prob, alg)
+# # sol1f3.estimated_ode_params[1]
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f11 = solve(prob, alg)
+# sol1f11.estimated_ode_params[1]
+# sol1f22 = solve(prob, alg)
+# sol1f22.estimated_ode_params[1]
+# # sol1f33 = solve(prob, alg)
+# # sol1f33.estimated_ode_params[1]
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f111 = solve(prob, alg)
+# sol1f111.estimated_ode_params[1]
+# sol1f222 = solve(prob, alg)
+# sol1f222.estimated_ode_params[1]
+# # sol1f333 = solve(prob, alg)
+# # sol1f333.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f1_p = solve(prob, alg)
+# sol1f1_p.estimated_ode_params[1]
+# sol1f2_p = solve(prob, alg)
+# sol1f2_p.estimated_ode_params[1]
+# sol1f3_p = solve(prob, alg)
+# sol1f3_p.estimated_ode_params[1]
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f11_p = solve(prob, alg)
+# sol1f11_p.estimated_ode_params[1]
+# sol1f22_p = solve(prob, alg)
+# sol1f22_p.estimated_ode_params[1]
+# sol1f33_p = solve(prob, alg)
+# sol1f33_p.estimated_ode_params[1]
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f111_p = solve(prob, alg)
+# sol1f111_p.estimated_ode_params[1]
+# sol1f222_p = solve(prob, alg)
+# sol1f222_p.estimated_ode_params[1]
+# sol1f333_p = solve(prob, alg)
+# sol1f333_p.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+
+# plot!(title = "9,2.5 50 training 2>full,1>partial")
+
+# p
+# param1
+# # (lux chain)
+# @prob mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 8e-2
+
+# # estimated parameters(lux chain)
+# param1 = sol3lux_pestim.estimated_ode_params[1]
+# @test abs(param1 - p) < abs(0.35 * p)
+
+# p
+# param1
+
+# # # my suggested Loss likelihood part
+# # #  + L2loss2(Tar, θ)
+# # # My suggested extra loss function
+# # function L2loss2(Tar::LogTargetDensity, θ)
+# #     f = Tar.prob.f
+
+# #     # parameter estimation chosen or not
+# #     if Tar.extraparams > 0
+# #         dataset = Tar.dataset
+
+# #         # Timepoints to enforce Physics
+# #         dataset = Array(reduce(hcat, dataset)')
+# #         t = dataset[end, :]
+# #         û = dataset[1:(end - 1), :]
+
+# #         ode_params = Tar.extraparams == 1 ?
+# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+# #         if length(û[:, 1]) == 1
+# #             physsol = [f(û[:, i][1],
+# #                 ode_params,
+# #                 t[i])
+# #                        for i in 1:length(û[1, :])]
+# #         else
+# #             physsol = [f(û[:, i],
+# #                 ode_params,
+# #                 t[i])
+# #                        for i in 1:length(û[1, :])]
+# #         end
+# #         #form of NN output matrix output dim x n
+# #         deri_physsol = reduce(hcat, physsol)
+
+# #         #   OG deriv(basically gradient matching in case of an ODEFunction)
+# #         # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+# #         # if length(û[:, 1]) == 1
+# #         #     deri_sol = [f(û[:, i][1],
+# #         #         Tar.prob.p,
+# #         #         t[i])
+# #         #                 for i in 1:length(û[1, :])]
+# #         # else
+# #         #     deri_sol = [f(û[:, i],
+# #         #         Tar.prob.p,
+# #         #         t[i])
+# #         #                 for i in 1:length(û[1, :])]
+# #         # end
+# #         # deri_sol = reduce(hcat, deri_sol)
+# #         derivatives = calculate_derivatives(Tar.dataset)
+# #         deri_sol = reduce(hcat, derivatives)
+
+# #         physlogprob = 0
+# #         for i in 1:length(Tar.prob.u0)
+# #             # can add phystd[i] for u[i]
+# #             physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+# #                     LinearAlgebra.Diagonal(map(abs2,
+# #                         Tar.l2std[i] .*
+# #                         ones(length(deri_sol[i, :]))))),
+# #                 deri_sol[i, :])
+# #         end
+# #         return physlogprob
+# #     else
+# #         return 0
+# #     end
+# # end
+
+# # function calculate_derivatives(dataset)
+# #     x̂, time = dataset
+# #     num_points = length(x̂)
+
+# #     # Initialize an array to store the derivative values.
+# #     derivatives = similar(x̂)
+
+# #     for i in 2:(num_points - 1)
+# #         # Calculate the first-order derivative using central differences.
+# #         Δt_forward = time[i + 1] - time[i]
+# #         Δt_backward = time[i] - time[i - 1]
+
+# #         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+# #         derivatives[i] = derivative
+# #     end
+
+# #     # Derivatives at the endpoints can be calculated using forward or backward differences.
+# #     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+# #     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+
+# #     return derivatives
+# # end
+
+# size(dataset[1])
+# # Problem 1 with param estimation(flux,lux)
+# # Normal
+# # 6.20311 Particles{Float64, 1},6.21746Particles{Float64, 1}
+# # better
+# # 6.29093Particles{Float64, 1}, 6.27925Particles{Float64, 1}
+# # Non ideal case
+# # 6.14861Particles{Float64, 1}, 
+# sol2flux.estimated_ode_params
+# sol2lux.estimated_ode_params[1]
+# p
+# size(sol3flux_pestim.ensemblesol[2])
+# plott = sol3flux_pestim.ensemblesol[1]
+# using StatsPlots
+# plotly()
+# plot(t, sol3flux_pestim.ensemblesol[1])
+
+# function calculate_derivatives(dataset)
+#     x̂, time = dataset
+#     num_points = length(x̂)
+
+#     # Initialize an array to store the derivative values.
+#     derivatives = similar(x̂)
+
+#     for i in 2:(num_points - 1)
+#         # Calculate the first-order derivative using central differences.
+#         Δt_forward = time[i + 1] - time[i]
+#         Δt_backward = time[i] - time[i - 1]
+
+#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+#         derivatives[i] = derivative
+#     end
+
+#     # Derivatives at the endpoints can be calculated using forward or backward differences.
+#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+
+#     return derivatives
+# end
+
+# # Example usage:
+# # dataset = [x̂, time]
+# derivatives = calculate_derivatives(dataset)
+# dataset[1]
+# # Access derivative values at specific time points as needed.
+
+# # # 9,0.5
+# # 0.09894916260292887
+# # 0.09870335436072103
+# # 0.08398556878067913
+# # 0.10109070099105527
+# # 0.09122683737517055
+# # 0.08614958011892977
+# # mean(abs.(x̂ .- meanscurve1)) #0.017112298305523976
+# # mean(abs.(physsol1 .- meanscurve1)) #0.004038636894341354
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1))#0.01800876370000113
+# # mean(abs.(physsol1 .- meanscurve1))#0.007285681280600875
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.10599926120358046
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10375554193397989
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.10160824458252521
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09999942538357891
+
+# # # ------------------------------------------------normale
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.0333356493928835
+# # mean(abs.(physsol1 .- meanscurve1)) #0.02721733876400459
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.020734206709433347
+# # mean(abs.(physsol1 .- meanscurve1)) #0.012502850740700212
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.10615859683094729
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10508141153722575
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.10833514946031565
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10668470203219232
+
+# # # 9,0.5
+# # 10.158108285475553
+# # 10.207234384538026
+# # 10.215000657664852
+# # 10.213817644016174
+# # 13.380030074088719
+# # 13.348906350967326
+
+# # 6.952731422892041
+
+# # # All losses
+# # 10.161478523326277
+# # # L2 losses 1
+# # 9.33312996960278
+# # # L2 losses 2
+# # 10.217417241370631
+
+# # mean([fhsamples1[i][26] for i in 500:1000]) #6.245045767509431
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.212522300650451
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #35.328636809737695
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #35.232963812125654
+
+# # # ---------------------------------------normale
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.547771572198114
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.158906185002702
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.210400972620185
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.153845019454522
+
+# # # ----------------more dataset normale -----------------------------
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.271141178216537
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.241144692919369
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.124480447973127
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.07838011629903
+
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.016551602015599295
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0021488618484224245
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.017022725082640747
+# # mean(abs.(physsol1 .- meanscurve1)) #0.004339761917100232
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.09668785317864312
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09430712337543362
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.09958118358974392
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09717454226368502
+
+# # # ----------------more dataset special -----------------------------
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.284355334485365
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.259238106698602
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.139808934336987
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.03921327641226
+
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.016627231605546876
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0020311429130039564
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.016650324577507352
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0027537543411154677
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.09713187937270151
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.09550234866855814
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
+
+# # using Plots, StatsPlots
+# # plotly()
+
+# # ---------------------------------------------------------
+# # # # Distribution abstract in wrapper, dataset Float64
+# # # 268.651 s (206393690 allocations: 388.71 GiB)
+# # # 318.170551 seconds (206.29 M allocations: 388.453 GiB, 20.83% gc time)
+
+# # # # Above with dataset Real subtype
+# # # 326.201 s (206327409 allocations: 388.42 GiB)
+# # # 363.189370 seconds (206.25 M allocations: 387.975 GiB, 15.77% gc time)
+# # # 306.171 s (206321277 allocations: 388.55 GiB)
+# # # 356.180699 seconds (206.43 M allocations: 388.361 GiB, 13.77% gc time)
+
+# # # # Above with dataset AbstractFloat subtype
+# # # 290.751187 seconds (205.94 M allocations: 387.955 GiB, 12.92% gc time)
+# # # 296.319815 seconds (206.38 M allocations: 388.730 GiB, 12.69% gc time)
+
+# # # # ODEProblem float64 dtaset and vector distri inside
+# # #   273.169 s (206128318 allocations: 388.40 GiB)
+# # #   274.059531 seconds (205.91 M allocations: 387.953 GiB, 12.77% gc time)
+
+# # # #   Dataset float64 inside and vector distri outsude
+# # #   333.603 s (206251143 allocations: 388.41 GiB)
+# # # 373.377222 seconds (206.11 M allocations: 387.968 GiB, 13.25% gc time)
+# # #   359.745 s (206348301 allocations: 388.41 GiB)
+# # # 357.813114 seconds (206.31 M allocations: 388.354 GiB, 13.54% gc time)
+
+# # # # Dataset float64 inside and vector distri inside
+# # #   326.437 s (206253571 allocations: 388.41 GiB)
+# # #   290.334083 seconds (205.92 M allocations: 387.954 GiB, 13.82% gc time)
+
+# # # # current setting
+# # # 451.304 s (206476927 allocations: 388.43 GiB)
+# # # 384.532732 seconds (206.22 M allocations: 387.976 GiB, 13.17% gc time)
+# # # 310.223 s (206332558 allocations: 388.63 GiB)
+# # # 344.243889 seconds (206.34 M allocations: 388.409 GiB, 13.84% gc time)
+# # # 357.457737 seconds (206.66 M allocations: 389.064 GiB, 18.16% gc time)
+
+# # # # shit setup
+# # #   325.595 s (206283732 allocations: 388.41 GiB)
+# # # 334.248753 seconds (206.06 M allocations: 387.964 GiB, 12.60% gc time)
+# # #   326.011 s (206370857 allocations: 388.56 GiB)
+# # # 327.203339 seconds (206.29 M allocations: 388.405 GiB, 12.92% gc time)
+
+# # # # in wrapper Distribution prior, insiade FLOAT64 DATASET
+# # # 325.158167 seconds (205.97 M allocations: 387.958 GiB, 15.07% gc time) 
+# # #   429.536 s (206476324 allocations: 388.43 GiB)
+# # #   527.364 s (206740343 allocations: 388.58 GiB)
+
+# # # #   wrapper Distribtuion, inside Float64
+# # # 326.017 s (206037971 allocations: 387.96 GiB)
+# # # 347.424730 seconds (206.45 M allocations: 388.532 GiB, 12.92% gc time)
+
+# # # 439.047568 seconds (284.24 M allocations: 392.598 GiB, 15.25% gc time, 14.36% compilation time: 0% of which was recompilation)
+# # # 375.472142 seconds (206.40 M allocations: 388.529 GiB, 14.93% gc time)
+# # # 374.888820 seconds (206.34 M allocations: 388.346 GiB, 14.09% gc time)
+# # # 363.719611 seconds (206.39 M allocations: 388.581 GiB, 15.08% gc time)
+# # # # inside Distribtion, instide Float64
+# # #   310.238 s (206324249 allocations: 388.53 GiB)
+# # #   308.991494 seconds (206.34 M allocations: 388.549 GiB, 14.01% gc time)
+# # #   337.442 s (206280712 allocations: 388.36 GiB)
+# # #   299.983096 seconds (206.29 M allocations: 388.512 GiB, 17.14% gc time)
+
+# # #   394.924357 seconds (206.27 M allocations: 388.337 GiB, 23.68% gc time)
+# # # 438.204179 seconds (206.39 M allocations: 388.470 GiB, 23.84% gc time)
+# # #   376.626914 seconds (206.46 M allocations: 388.693 GiB, 18.72% gc time)
+# # # 286.863795 seconds (206.14 M allocations: 388.370 GiB, 18.80% gc time)
+# # #   285.556929 seconds (206.22 M allocations: 388.371 GiB, 17.04% gc time)
+# # #   291.471662 seconds (205.96 M allocations: 388.068 GiB, 19.85% gc time)
+
+# # # 495.814341 seconds (284.62 M allocations: 392.622 GiB, 12.56% gc time, 10.96% compilation time: 0% of which was recompilation)
+# # # 361.530617 seconds (206.36 M allocations: 388.526 GiB, 14.98% gc time)
+# # # 348.576065 seconds (206.22 M allocations: 388.337 GiB, 15.01% gc time)
+# # # 374.575609 seconds (206.45 M allocations: 388.586 GiB, 14.65% gc time)
+# # # 314.223008 seconds (206.23 M allocations: 388.411 GiB, 14.63% gc time)
+
+# # PROBLEM-3 LOTKA VOLTERRA EXAMPLE [WIP] (WITH PARAMETER ESTIMATION)(will be put in tutorial page)
+# function lotka_volterra(u, p, t)
+#     # Model parameters.
+#     α, β, γ, δ = p
+#     # Current state.
+#     x, y = u
+
+#     # Evaluate differential equations.
+#     dx = (α - β * y) * x # prey
+#     dy = (δ * x - γ) * y # predator
+
+#     return [dx, dy]
+# end
+
+# u0 = [1.0, 1.0]
+# p = [1.5, 1.0, 3.0, 1.0]
+# tspan = (0.0, 6.0)
+# prob = ODEProblem(lotka_volterra, u0, tspan, p)
+# solution = solve(prob, Tsit5(); saveat = 0.05)
+
+# as = reduce(hcat, solution.u)
+# as[1, :]
+# # Plot simulation.
+# time = solution.t
+# u = hcat(solution.u...)
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x = u[1, :] + 0.5 * randn(length(u[1, :]))
+# y = u[2, :] + 0.5 * randn(length(u[1, :]))
+# dataset = [x[1:50], y[1:50], time[1:50]]
+# # scatter!(time, [x, y])
+# # scatter!(dataset[3], [dataset[2], dataset[1]])
+
+# # NN has 2 outputs as u -> [dx,dy]
+# chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
+#     Lux.Dense(6, 2))
+# chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
+
+# # fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+# #                                                                           dataset = dataset,
+# #                                                                           draw_samples = 1000,
+# #                                                                           l2std = [
+# #                                                                               0.05,
+# #                                                                               0.05,
+# #                                                                           ],
+# #                                                                           phystd = [
+# #                                                                               0.05,
+# #                                                                               0.05,
+# #                                                                           ],
+# #                                                                           priorsNNw = (0.0,
+# #          
+
+# #   3.0))
+
+# # check if NN output is more than 1
+# # numoutput = size(luxar[1])[1]
+# # if numoutput > 1
+# #     # Initialize a vector to store the separated outputs for each output dimension
+# #     output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput]
+
+# #     # Loop through each element in the `as` vector
+# #     for element in as
+# #         for i in 1:numoutput
+# #             push!(output_matrices[i], element[i, :])  # Append the i-th output (i-th row) to the i-th output_matrices
+# #         end
+# #     end
+
+# #     ensemblecurves = Vector{}[]
+# #     for r in 1:numoutput
+# #         br = hcat(output_matrices[r]...)'
+# #         ensemblecurve = prob.u0[r] .+
+# #                         [Particles(br[:, i]) for i in 1:length(t)] .*
+# #                         (t .- prob.tspan[1])
+# #         push!(ensemblecurves, ensemblecurve)
+# #     end
+
+# # else
+# #     # ensemblecurve = prob.u0 .+
+# #     #                 [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .*
+# #     #                 (t .- prob.tspan[1])
+# #     print("yuh")
+# # end
+
+# # fhsamplesflux2
+# # nnparams = length(init1)
+# # estimnnparams = [Particles(reduce(hcat, fhsamplesflux2)[i, :]) for i in 1:nnparams]
+# # ninv=4
+# # estimated_params = [Particles(reduce(hcat, fhsamplesflux2[(end - ninv + 1):end])[i, :])
+# #                     for i in (nnparams + 1):(nnparams + ninv)]
+# # output_matrices[r]
+# # br = hcat(output_matrices[r]...)'
+
+# # br[:, 1]
+
+# # [Particles(br[:, i]) for i in 1:length(t)]
+# # prob.u0
+# # [Particles(br[:, i]) for i in 1:length(t)] .*
+# # (t .- prob.tspan[1])
+
+# # ensemblecurve = prob.u0[r] .+
+# #                 [Particles(br[:, i]) for i in 1:length(t)] .*
+# #                 (t .- prob.tspan[1])
+# # push!(ensemblecurves, ensemblecurve)
+
+# using StatsPlots
+# plotly()
+# plot(t, ensemblecurve)
+# plot(t, ensemblecurves[1])
+# plot!(t, ensemblecurves[2])
+# ensemblecurve
+# ensemblecurves[1]
+# fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(1.5,
+#             0.5),
+#         Normal(1.2,
+#             0.5),
+#         Normal(3.3,
+#             0.5),
+#         Normal(1.4,
+#             0.5),
+#     ], progress = true)
+
+# alg = NeuralPDE.BNNODE(chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(4.5,
+#             5),
+#         Normal(7,
+#             2),
+#         Normal(5,
+#             2),
+#         Normal(-4,
+#             6),
+#     ],
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux_pestim = solve(prob, alg)
+
+# # OG PARAM VALUES
+# [1.5, 1.0, 3.0, 1.0]
+# # less
+# # [1.34, 7.51, 2.54, -2.55]
+# # better
+# # [1.48, 0.993, 2.77, 0.954]
+
+# sol3flux_pestim.es
+# sol3flux_pestim.estimated_ode_params
+# # fh_mcmc_chainlux1, fhsampleslux1, fhstatslux1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                                        dataset = dataset,
+# #                                                                        draw_samples = 1000,
+# #                                                                        l2std = [0.05, 0.05],
+# #                                                                        phystd = [
+# #                                                                            0.05,
+# #                                                                            0.05,
+# #                                                                        ],
+# #                                                                        priorsNNw = (0.0,
+# #                                                                                     3.0))
+
+# # fh_mcmc_chainlux2, fhsampleslux2, fhstatslux2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                                        dataset = dataset,
+# #                                                                        draw_samples = 1000,
+# #                                                                        l2std = [0.05, 0.05],
+# #                                                                        phystd = [
+# #                                                                            0.05,
+# #                                                                            0.05,
+# #                                                                        ],
+# #                                                                        priorsNNw = (0.0,
+# #                                                                                     3.0),
+# #                                                                        param = [
+# #                                                                            Normal(1.5, 0.5),
+# #                                                                            Normal(1.2, 0.5),
+# #                                                                            Normal(3.3, 0.5),
+# #                                                                            Normal(1.4, 0.5),
+# #                                                                        ])
+
+# init1, re1 = destructure(chainflux1)
+# θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+# #   PLOT testing points
+# t = time
+# p = prob.p
+# collect(Float64, vcat(ComponentArrays.ComponentArray(θinit)))
+# collect(Float64, ComponentArrays.ComponentArray(θinit))
+# # Mean of last 1000 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+# out = re1.([fhsamplesflux1[i][1:68] for i in 500:1000])
+# yu = [out[i](t') for i in eachindex(out)]
+
+# function getensemble(yu, num_models)
+#     num_rows, num_cols = size(yu[1])
+#     row_means = zeros(Float32, num_rows, num_cols)
+#     for i in 1:num_models
+#         row_means .+= yu[i]
+#     end
+#     row_means ./ num_models
+# end
+# fluxmean = getensemble(yu, length(out))
+# meanscurve1_1 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
+# mean(abs.(u .- meanscurve1_1))
+
+# plot!(t, physsol1)
+# @test mean(abs2.(x̂ .- meanscurve1_1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# out = re1.([fhsamplesflux2[i][1:68] for i in 500:1000])
+# yu = collect(out[i](t') for i in eachindex(out))
+# fluxmean = getensemble(yu, length(out))
+# meanscurve1_2 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
+# mean(abs.(u .- meanscurve1_2))
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# θ = [vector_to_parameters(fhsampleslux1[i][1:(end - 4)], θinit) for i in 500:1000]
+# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+# meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# θ = [vector_to_parameters(fhsampleslux2[i][1:(end - 4)], θinit) for i in 500:1000]
+# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+# meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# # # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+# @test abs(p - mean([fhsamplesflux2[i][69] for i in 500:1000])) < 0.1 * p[1]
+# @test abs(p - mean([fhsampleslux2[i][69] for i in 500:1000])) < 0.2 * p[1]
+
+# # @test abs(p - mean([fhsamplesflux2[i][70] for i in 500:1000])) < 0.1 * p[2]
+# # @test abs(p - mean([fhsampleslux2[i][70] for i in 500:1000])) < 0.2 * p[2]
+
+# # @test abs(p - mean([fhsamplesflux2[i][71] for i in 500:1000])) < 0.1 * p[3]
+# # @test abs(p - mean([fhsampleslux2[i][71] for i in 500:1000])) < 0.2 * p[3]
+
+# # @test abs(p - mean([fhsamplesflux2[i][72] for i in 500:1000])) < 0.1 * p[4]
+# # @test abs(p - mean([fhsampleslux2[i][72] for i in 500:1000])) < 0.2 * p[4]
+
+# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                               dataset = dataset,
+# #                                                               draw_samples = 1000,
+# #                                                               l2std = [0.05, 0.05],
+# #                                                               phystd = [0.05, 0.05],
+# #                                                               priorsNNw = (0.0, 3.0),
+# #                                                               param = [
+# #                                                                   Normal(1.5, 0.5),
+# #                                                                   Normal(1.2, 0.5),
+# #                                                                   Normal(3.3, 0.5),
+# #                                                                   Normal(1.4, 0.5),
+# #                                                               ], autodiff = true)
+
+# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                               dataset = dataset,
+# #                                                               draw_samples = 1000,
+# #                                                               l2std = [0.05, 0.05],
+# #                                                               phystd = [0.05, 0.05],
+# #                                                               priorsNNw = (0.0, 3.0),
+# #                                                               param = [
+# #                                                                   Normal(1.5, 0.5),
+# #                                                                   Normal(1.2, 0.5),
+# #                                                                   Normal(3.3, 0.5),
+# #                                                                   Normal(1.4, 0.5),
+# #                                                               ], nchains = 2)
+
+# # NOTES (WILL CLEAR LATER)
+# # --------------------------------------------------------------------------------------------
+# # Hamiltonian energy must be lowest(more paramters the better is it to map onto them)
+# # full better than L2 and phy individual(test)
+# # in mergephys more points after training points is better from 20->40
+# # does consecutive runs bceome better? why?(plot 172)(same chain maybe)
+# # does density of points in timespan matter dataset vs internal timespan?(plot 172)(100+0.01)
+# # when training from 0-1 and phys from 1-5 with 1/150 simple nn slow,but bigger nn faster decrease in Hmailtonian
+# # bigger time interval more curves to adapt to only more parameters adapt to that, better NN architecture
+# # higher order logproblems solve better
+# # repl up up are same instances? but reexecute calls are new?
+
+# #Compare results against paper example
+# # Lux chains support (DONE)
+# # fix predictions for odes depending upon 1,p in f(u,p,t)(DONE)
+# # lotka volterra learn curve beyond l2 losses(L2 losses determine accuracy of parameters)(parameters cant run free ∴ L2 interval only)
+# # check if prameters estimation works(YES)
+# # lotka volterra parameters estimate (DONE)
+
+# using NeuralPDE, Lux, Flux, Optimization, OptimizationOptimJL
+# import ModelingToolkit: Interval
+# using Plots, StatsPlots
+# plotly()
+# # Profile.init()
+
+# @parameters x y
+# @variables u(..)
+# Dxx = Differential(x)^2
+# Dyy = Differential(y)^2
+
+# # 2D PDE
+# eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# # Boundary conditions
+# bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+#     u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+# # Space and time domains
+# domains = [x ∈ Interval(0.0, 1.0),
+#     y ∈ Interval(0.0, 1.0)]
+
+# # Neural network
+# dim = 2 # number of dimensions
+# chain = Flux.Chain(Flux.Dense(dim, 16, Lux.σ), Flux.Dense(16, 16, Lux.σ), Flux.Dense(16, 1))
+# θ, re = destructure(chain)
+# # Discretization
+# dx = 0.05
+# discretization = PhysicsInformedNN(chain, GridTraining(dx))
+
+# @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+# pinnrep = symbolic_discretize(pde_system, discretization)
+# typeof(pinnrep.phi)
+# typeof(pinnrep.phi)
+# typeof(re)
+# pinnrep.phi([1, 2], θ)
+
+# typeof(θ)
+
+# print(pinnrep)
+# pinnrep.eqs
+# pinnrep.bcs
+# pinnrep.domains
+# pinnrep.eq_params
+# pinnrep.defaults
+# print(pinnrep.default_p)
+# pinnrep.param_estim
+# print(pinnrep.additional_loss)
+# pinnrep.adaloss
+# pinnrep.depvars
+# pinnrep.indvars
+# pinnrep.dict_depvar_input
+# pinnrep.dict_depvars
+# pinnrep.dict_indvars
+# print(pinnrep.logger)
+# pinnrep.multioutput
+# pinnrep.iteration
+# pinnrep.init_params
+# pinnrep.flat_init_params
+# pinnrep.phi
+# pinnrep.derivative
+# pinnrep.strategy
+# pinnrep.pde_indvars
+# pinnrep.bc_indvars
+# pinnrep.pde_integration_vars
+# pinnrep.bc_integration_vars
+# pinnrep.integral
+# pinnrep.symbolic_pde_loss_functions
+# pinnrep.symbolic_bc_loss_functions
+# pinnrep.loss_functions
+
+# #  = discretize(pde_system, discretization)
+# prob = symbolic_discretize(pde_system, discretization)
+# # "The boundary condition loss functions"
+# sum([prob.loss_functions.bc_loss_functions[i](θ) for i in eachindex(1:4)])
+# sum([prob.loss_functions.pde_loss_functions[i](θ) for i in eachindex(1)])
+
+# prob.loss_functions.full_loss_function(θ, 32)
+
+# prob.loss_functions.bc_loss_functions[1](θ)
+
+# prob.loss_functions.bc_loss_functions
+# prob.loss_functions.full_loss_function
+# prob.loss_functions.additional_loss_function
+# prob.loss_functions.pde_loss_functions
+
+# 1.3953060473003345 + 1.378102161087438 + 1.395376727128639 + 1.3783868705075002 +
+# 0.22674532775196876
+# # "The PDE loss functions"
+# prob.loss_functions.pde_loss_functions
+# prob.loss_functions.pde_loss_functions[1](θ)
+# # "The full loss function, combining the PDE and boundary condition loss functions.This is the loss function that is used by the optimizer."
+# prob.loss_functions.full_loss_function(θ, nothing)
+# prob.loss_functions.full_loss_function(θ, 423423)
+
+# # "The wrapped `additional_loss`, as pieced together for the optimizer."
+# prob.loss_functions.additional_loss_function
+# # "The pre-data version of the PDE loss function"
+# prob.loss_functions.datafree_pde_loss_functions
+# # "The pre-data version of the BC loss function"
+# prob.loss_functions.datafree_bc_loss_functions
+
+# using Random
+# θ, st = Lux.setup(Random.default_rng(), chain)
+# #Optimizer
+# opt = OptimizationOptimJL.BFGS()
+
+# #Callback function
+# callback = function (p, l)
+#     println("Current loss is: $l")
+#     return false
+# end
+
+# res = Optimization.solve(prob, opt, callback = callback, maxiters = 1000)
+# phi = discretization.phi
+
+# # ------------------------------------------------
+# using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, OrdinaryDiffEq,
+#       Plots
+# import ModelingToolkit: Interval, infimum, supremum
+# @parameters t, σ_, β, ρ
+# @variables x(..), y(..), z(..)
+# Dt = Differential(t)
+# eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+#     Dt(y(t)) ~ x(t) * (ρ - z(t)) - y(t),
+#     Dt(z(t)) ~ x(t) * y(t) - β * z(t)]
+
+# bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+# domains = [t ∈ Interval(0.0, 1.0)]
+# dt = 0.01
+
+# input_ = length(domains)
+# n = 8
+# chain1 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+# chain2 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+# chain3 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+
+# function lorenz!(du, u, p, t)
+#     du[1] = 10.0 * (u[2] - u[1])
+#     du[2] = u[1] * (28.0 - u[3]) - u[2]
+#     du[3] = u[1] * u[2] - (8 / 3) * u[3]
+# end
+
+# u0 = [1.0; 0.0; 0.0]
+# tspan = (0.0, 1.0)
+# prob = ODEProblem(lorenz!, u0, tspan)
+# sol = solve(prob, Tsit5(), dt = 0.1)
+# ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
+# function getData(sol)
+#     data = []
+#     us = hcat(sol(ts).u...)
+#     ts_ = hcat(sol(ts).t...)
+#     return [us, ts_]
+# end
+# data = getData(sol)
+
+# (u_, t_) = data
+# len = length(data[2])
+
+# depvars = [:x, :y, :z]
+# function additional_loss(phi, θ, p)
+#     return sum(sum(abs2, phi[i](t_, θ[depvars[i]]) .- u_[[i], :]) / len for i in 1:1:3)
+# end
+
+# discretization = NeuralPDE.PhysicsInformedNN([chain1, chain2, chain3],
+#                                              NeuralPDE.GridTraining(dt),
+#                                              param_estim = false,
+#                                              additional_loss = additional_loss)
+# @named pde_system = PDESystem(eqs, bcs, domains, [t], [x(t), y(t), z(t)], [σ_, ρ, β],
+#                               defaults = Dict([p .=> 1.0 for p in [σ_, ρ, β]]))
+# prob = NeuralPDE.discretize(pde_system, discretization)
+# callback = function (p, l)
+#     println("Current loss is: $l")
+#     return false
+# end
+# res = Optimization.solve(prob, BFGS(); callback = callback, maxiters = 5000)
+# p_ = res.u[(end - 2):end] # p_ = [9.93, 28.002, 2.667]
+
+# minimizers = [res.u.depvar[depvars[i]] for i in 1:3]
+# ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
+# u_predict = [[discretization.phi[i]([t], minimizers[i])[1] for t in ts] for i in 1:3]
+# plot(sol)
+# plot!(ts, u_predict, label = ["x(t)" "y(t)" "z(t)"])
+
+# discretization.multioutput
+# discretization.chain
+# discretization.strategy
+# discretization.init_params
+# discretization.phi
+# discretization.derivative
+# discretization.param_estim
+# discretization.additional_loss
+# discretization.adaptive_loss
+# discretization.logger
+# discretization.log_options
+# discretization.iteration
+# discretization.self_increment
+# discretization.multioutput
+# discretization.kwargs
+
+# struct BNNODE1{P <: Vector{<:Distribution}}
+#     chain::Any
+#     Kernel::Any
+#     draw_samples::UInt32
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE1(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
+#                      l2std = [0.05], phystd = [0.05])
+#         BNNODE1(chain, Kernel, draw_samples, priorsNNw, param, l2std, phystd)
+#     end
+# end
+
+# struct BNNODE3{C, K, P <: Union{Any, Vector{<:Distribution}}}
+#     chain::C
+#     Kernel::K
+#     draw_samples::UInt32
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE3(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
+#                      l2std = [0.05], phystd = [0.05])
+#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain, Kernel, draw_samples,
+#                                                           priorsNNw, param, l2std, phystd)
+#     end
+# end
+# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+# linear = (u, p, t) -> cos(2 * π * t)
+# tspan = (0.0, 2.0)
+# u0 = 0.0
+# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+
+# ta = range(tspan[1], tspan[2], length = 300)
+# u = [linear_analytic(u0, nothing, ti) for ti in ta]
+# sol1 = solve(prob, Tsit5())
+
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂[1:100], time[1:100]]
+
+# # Call BPINN, create chain
+# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
+# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+# HMC
+# solve(prob, BNNODE(chainflux, HMC))
+# BNNODE1(chainflux, HMC, 2000)
+
+# draw_samples = 2000
+# priorsNNw = (0.0, 3.0)
+# param = []
+# l2std = [0.05]
+# phystd = [0.05]
+# @time BNNODE3(chainflux, HMC, draw_samples = 2000, priorsNNw = (0.0, 3.0),
+#               param = [nothing],
+#               l2std = [0.05], phystd = [0.05])
+# typeof(Nothing) <: Vector{<:Distribution}
+# Nothing <: Distribution
+# {UnionAll} <: Distribution
+# @time [Nothing]
+# typeof([Nothing])
+# @time [1]
+
+# function test1(sum; c = 23, d = 32)
+#     return sum + c + d
+# end
+# function test(a, b; c, d)
+#     return test1(a + b, c, d)
+# end
+
+# test(2, 2)
+
+# struct BNNODE3{C, K, P <: Union{Vector{Nothing}, Vector{<:Distribution}}}
+#     chain::C
+#     Kernel::K
+#     draw_samples::Int64
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE3(chain, Kernel; draw_samples,
+#                      priorsNNw, param = [nothing], l2std, phystd)
+#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain,
+#                                                           Kernel,
+#                                                           draw_samples,
+#                                                           priorsNNw,
+#                                                           param, l2std,
+#                                                           phystd)
+#     end
+# end
+
+# function solve1(prob::DiffEqBase.AbstractODEProblem, alg::BNNODE3;
+#                 dataset = [nothing], dt = 1 / 20.0,
+#                 init_params = nothing, nchains = 1,
+#                 autodiff = false, Integrator = Leapfrog,
+#                 Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
+#                 Metric = DiagEuclideanMetric, jitter_rate = 3.0,
+#                 tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
+#                 n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = true,
+#                 verbose = false)
+#     chain = alg.chain
+#     l2std = alg.l2std
+#     phystd = alg.phystd
+#     priorsNNw = alg.priorsNNw
+#     Kernel = alg.Kernel
+#     draw_samples = alg.draw_samples
+
+#     param = alg.param == [nothing] ? [] : alg.param
+#     mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain, dataset = dataset,
+#                                                             draw_samples = draw_samples,
+#                                                             init_params = init_params,
+#                                                             physdt = dt, l2std = l2std,
+#                                                             phystd = phystd,
+#                                                             priorsNNw = priorsNNw,
+#                                                             param = param,
+#                                                             nchains = nchains,
+#                                                             autodiff = autodiff,
+#                                                             Kernel = Kernel,
+#                                                             Integrator = Integrator,
+#                                                             Adaptor = Adaptor,
+#                                                             targetacceptancerate = targetacceptancerate,
+#                                                             Metric = Metric,
+#                                                             jitter_rate = jitter_rate,
+#                                                             tempering_rate = tempering_rate,
+#                                                             max_depth = max_depth,
+#                                                             Δ_max = Δ_max,
+#                                                             n_leapfrog = n_leapfrog, δ = δ,
+#                                                             λ = λ, progress = progress,
+#                                                             verbose = verbose)
+# end
+
+# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+# linear = (u, p, t) -> cos(2 * π * t)
+# tspan = (0.0, 2.0)
+# u0 = 0.0
+# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+
+# ta = range(tspan[1], tspan[2], length = 300)
+# u = [linear_analytic(u0, nothing, ti) for ti in ta]
+# # sol1 = solve(prob, Tsit5())
+
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂[1:100], time[1:100]]
+
+# # Call BPINN, create chain
+# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
+# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+# HMC
+
+# solve1(prob, a)
+# a = BNNODE3(chainflux, HMC, draw_samples = 2000,
+#             priorsNNw = (0.0, 3.0),
+#             l2std = [0.05], phystd = [0.05])
+
+# Define Lotka-Volterra model.
+function lotka_volterra1(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 6.0)
+prob = ODEProblem(lotka_volterra1, u0, tspan, p)
+solution = solve(prob, Tsit5(); saveat = 0.05)
+
+as = reduce(hcat, solution.u)
+as[1, :]
+# Plot simulation.
+time = solution.t
+u = hcat(solution.u...)
+# BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+x = u[1, :] + 0.5 * randn(length(u[1, :]))
+y = u[2, :] + 0.5 * randn(length(u[1, :]))
+dataset = [x[1:50], y[1:50], time[1:50]]
+# scatter!(time, [x, y])
+# scatter!(dataset[3], [dataset[2], dataset[1]])
+
+# NN has 2 outputs as u -> [dx,dy]
+chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
+    Lux.Dense(6, 2))
+chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
+
+fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0, 3.0), progress = true)
+ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0, 3.0), progress = true)
+
+#     2×171 Matrix{Float64}:
+#  -0.5  -0.518956  -0.529639  …  -1.00266  -1.01049
+#   2.0   1.97109    1.92747       0.42619   0.396335
+
+#     2-element Vector{Float64}:
+#  -119451.94949911036
+#  -128543.23714618056
+
+# alg = NeuralPDE.BNNODE(chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(4.5,
+#             5),
+#         Normal(7,
+#             2),
+#         Normal(5,
+#             2),
+#         Normal(-4,
+#             6),
+#     ],
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux_pestim = solve(prob, alg)
+
+#  ----------------------------------------------
+# original paper implementation
+# 25 points 
+run1  #7.70593 Particles{Float64, 1}
+run2 #6.66347 Particles{Float64, 1} 
+run3 #6.84827 Particles{Float64, 1} 
+
+# 50 points 
+run1 #7.83577 Particles{Float64, 1}
+run2 #6.49477 Particles{Float64, 1}
+run3 #6.47421 Particles{Float64, 1}
+
+# 100 points 
+run1 #5.96604 Particles{Float64, 1}
+run2 #6.05432 Particles{Float64, 1}
+run3 #6.08856 Particles{Float64, 1}
+
+# Full likelihood(uses total variation regularized differentiation) 
+# 25 points 
+run1 #6.41722 Particles{Float64, 1}
+run2 #6.42782 Particles{Float64, 1}
+run3 #6.42782 Particles{Float64, 1}
+
+# 50 points
+run1 #5.71268 Particles{Float64, 1}
+run2 #5.74599 Particles{Float64, 1}
+run3 #5.74599 Particles{Float64, 1}
+
+# 100 points  
+run1 #6.59097 Particles{Float64, 1}
+run2 #6.62813 Particles{Float64, 1}
+run3 #6.62813 Particles{Float64, 1}
+
+using Plots, StatsPlots
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 6.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+# Plot simulation.
+
+solution = solve(prob, Tsit5(); saveat = 0.05)
+plot(solve(prob, Tsit5()))
+
+# Dataset creation for parameter estimation
+time = solution.t
+u = hcat(solution.u...)
+x = u[1, :] + 0.5 * randn(length(u[1, :]))
+y = u[2, :] + 0.5 * randn(length(u[1, :]))
+dataset = [x, y, time]
+
+# Neural Networks must have 2 outputs as u -> [dx,dy] in function lotka_volterra()
+chainflux = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2)) |>
+            Flux.f64
+
+chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
+
+alg1 = NeuralPDE.BNNODE(chainflux,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol_flux_pestim = solve(prob, alg1)
+
+# Dataset not needed as we are solving the equation with ideal parameters
+alg2 = NeuralPDE.BNNODE(chainlux,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    n_leapfrog = 30, progress = true)
+
+sol_lux = solve(prob, alg2)
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+# plotting solution for x,y for chain_flux
+plot(t, sol_flux_pestim.ensemblesol[1])
+plot!(t, sol_flux_pestim.ensemblesol[2])
+
+plot(sol_flux_pestim.ens1mblesol[1])
+plot!(sol_flux_pestim.ensemblesol[2])
+
+# estimated ODE parameters by .estimated_ode_params, weights and biases by .estimated_nn_params
+sol_flux_pestim.estimated_nn_params
+sol_flux_pestim.estimated_ode_params
+
+# plotting solution for x,y for chain_lux
+plot(t, sol_lux.ensemblesol[1])
+plot!(t, sol_lux.ensemblesol[2])
+
+# estimated weights and biases by .estimated_nn_params for chain_lux
+sol_lux.estimated_nn_params
+
+# # ----------------------------------stats-----------------------------
+# #   ----------------------------
+# # -----------------------------
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:47 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:38 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:12 
+# #   --------------------------
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:05:09 
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:47 
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:25 
+# #   --------------
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:47 
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:54
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:46
+# # ------------------------
+# # -----------------------
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:06
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:32
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:01 
+# # --------------------------
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:02
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:08
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:15
+# # ----------------------------
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:37
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:02
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:13
+
+using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL
+import ModelingToolkit: Interval, infimum, supremum
+
+using NeuralPDE, Flux, OptimizationOptimisers
+
+function diffeq(u, p, t)
+    u1, u2 = u
+    return [u2, p[1] + p[2] * sin(u1) + p[3] * u2]
+end
+p = [5, -10, -1.7]
+u0 = [-1.0, 7.0]
+tspan = (0.0, 10.0)
+prob = ODEProblem(ODEFunction(diffeq), u0, tspan, p)
+
+chainnew = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2)) |>
+           Flux.f64
+
+opt = OptimizationOptimisers.Adam(0.1)
+opt = Optimisers.ADAGrad(0.1)
+opt = Optimisers.AdaMax(0.01)
+algnew = NeuralPDE.NNODE(chainnew, opt)
+solution_new = solve(prob, algnew, verbose = true,
+    abstol = 1e-10, maxiters = 7000)
+u = reduce(hcat, solution_new.u)
+plot(solution_new.t, u[1, :])
+plot!(solution_new.t, u[2, :])
+
+algnew = NeuralPDE.BNNODE(chainnew, draw_samples = 200,
+    n_leapfrog = 30, progress = true)
+solution_new = solve(prob, algnew)
+
+@parameters t
+@variables u1(..), u2(..)
+D = Differential(t)
+eq = [D(u1(t)) ~ u2(t),
+    D(u2(t)) ~ 5 - 10 * sin(u1(t)) - 1.7 * u2(t)];
+
+import ModelingToolkit: Interval
+bcs = [u1(0) ~ -1, u2(0) ~ 7]
+domains = [t ∈ Interval(0.0, 10.0)]
+dt = 0.01
+
+input_ = length(domains) # number of dimensions
+n = 16
+chain = [Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ), Lux.Dense(n, 1))
+         for _ in 1:2]
+
+@named pde_system = PDESystem(eq, bcs, domains, [t], [u1(t), u2(t)])
+
+strategy = NeuralPDE.GridTraining(dt)
+discretization = PhysicsInformedNN(chain, strategy)
+sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+
+pde_loss_functions = sym_prob.loss_functions.pde_loss_functions
+bc_loss_functions = sym_prob.loss_functions.bc_loss_functions
+
+callback = function (p, l)
+    println("loss: ", l)
+    # println("pde_losses: ", map(l_ -> l_(p), pde_loss_functions))
+    # println("bcs_losses: ", map(l_ -> l_(p), bc_loss_functions))
+    return false
+end
+
+loss_functions = [pde_loss_functions; bc_loss_functions]
+
+function loss_function(θ, p)
+    sum(map(l -> l(θ), loss_functions))
+end
+
+f_ = OptimizationFunction(loss_function, Optimization.AutoZygote())
+prob = Optimization.OptimizationProblem(f_, sym_prob.flat_init_params)
+
+res = Optimization.solve(prob,
+    OptimizationOptimJL.BFGS();
+    callback = callback,
+    maxiters = 1000)
+phi = discretization.phi
\ No newline at end of file

From b2f3ac1e4aeea8350005113fb8dd9f16bdc06421 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Thu, 19 Oct 2023 17:48:04 -0400
Subject: [PATCH 002/107] Put new loglikelihood behind a conditional

---
 src/NeuralPDE.jl                       |   4 +-
 src/{ => bayesian}/BPINN_ode.jl        |   6 +-
 src/{ => bayesian}/advancedHMC_MCMC.jl |  22 ++-
 src/bayesian/collocated_estim.jl       | 194 +++++++++++++++++++++++++
 test/bpinnexperimental.jl              |  66 +++++++++
 5 files changed, 281 insertions(+), 11 deletions(-)
 rename src/{ => bayesian}/BPINN_ode.jl (98%)
 rename src/{ => bayesian}/advancedHMC_MCMC.jl (97%)
 create mode 100644 src/bayesian/collocated_estim.jl
 create mode 100644 test/bpinnexperimental.jl

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 945093ea04..e38fca98d4 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -50,8 +50,8 @@ include("rode_solve.jl")
 include("transform_inf_integral.jl")
 include("discretize.jl")
 include("neural_adapter.jl")
-include("advancedHMC_MCMC.jl")
-include("BPINN_ode.jl")
+include("bayesian/advancedHMC_MCMC.jl")
+include("bayesian/BPINN_ode.jl")
 
 export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE,
        KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem,
diff --git a/src/BPINN_ode.jl b/src/bayesian/BPINN_ode.jl
similarity index 98%
rename from src/BPINN_ode.jl
rename to src/bayesian/BPINN_ode.jl
index da49640314..5c26329f14 100644
--- a/src/BPINN_ode.jl
+++ b/src/bayesian/BPINN_ode.jl
@@ -178,7 +178,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
     verbose = false,
     saveat = 1 / 50.0,
     maxiters = nothing,
-    numensemble = floor(Int, alg.draw_samples / 3))
+    numensemble = floor(Int, alg.draw_samples / 3),
+    estim_collocate = false)
     @unpack chain, l2std, phystd, param, priorsNNw, Kernel, strategy,
     draw_samples, dataset, init_params,
     nchains, physdt, Adaptorkwargs, Integratorkwargs,
@@ -207,7 +208,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
         Integratorkwargs = Integratorkwargs,
         MCMCkwargs = MCMCkwargs,
         progress = progress,
-        verbose = verbose)
+        verbose = verbose,
+        estim_collocate = estim_collocate)
 
     fullsolution = BPINNstats(mcmcchain, samples, statistics)
     ninv = length(param)
diff --git a/src/advancedHMC_MCMC.jl b/src/bayesian/advancedHMC_MCMC.jl
similarity index 97%
rename from src/advancedHMC_MCMC.jl
rename to src/bayesian/advancedHMC_MCMC.jl
index 6032c7ca21..6b6b3303e7 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/bayesian/advancedHMC_MCMC.jl
@@ -16,11 +16,12 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
     physdt::Float64
     extraparams::Int
     init_params::I
+    estim_collocate::Bool
 
     function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
         dataset,
         priors, phystd, l2std, autodiff, physdt, extraparams,
-        init_params::AbstractVector)
+        init_params::AbstractVector, estim_collocate)
         new{
             typeof(chain),
             Nothing,
@@ -39,12 +40,13 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
             autodiff,
             physdt,
             extraparams,
-            init_params)
+            init_params,
+            estim_collocate)
     end
     function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
         dataset,
         priors, phystd, l2std, autodiff, physdt, extraparams,
-        init_params::NamedTuple)
+        init_params::NamedTuple, estim_collocate)
         new{
             typeof(chain),
             typeof(st),
@@ -60,7 +62,8 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
             autodiff,
             physdt,
             extraparams,
-            init_params)
+            init_params,
+            estim_collocate)
     end
 end
 
@@ -79,7 +82,11 @@ function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
 end
 
 function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
-    return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
+    if Tar.estim_collocate
+        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ) + L2loss2(Tar, θ)
+    else
+        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
+    end
 end
 
 LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
@@ -481,7 +488,8 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
     Integratorkwargs = (Integrator = Leapfrog,),
     MCMCkwargs = (n_leapfrog = 30,),
-    progress = false, verbose = false)
+    progress = false, verbose = false,
+    estim_collocate = false)
 
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)
@@ -542,7 +550,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     t0 = prob.tspan[1]
     # dimensions would be total no of params,initial_nnθ for Lux namedTuples
     ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
-        phystd, l2std, autodiff, physdt, ninv, initial_nnθ)
+        phystd, l2std, autodiff, physdt, ninv, initial_nnθ, estim_collocate)
 
     try
         ℓπ(t0, initial_θ[1:(nparameters - ninv)])
diff --git a/src/bayesian/collocated_estim.jl b/src/bayesian/collocated_estim.jl
new file mode 100644
index 0000000000..157388194e
--- /dev/null
+++ b/src/bayesian/collocated_estim.jl
@@ -0,0 +1,194 @@
+# suggested extra loss function
+function L2loss2(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        dataset, deri_sol = Tar.dataset
+        # deri_sol = deri_sol'
+        autodiff = Tar.autodiff
+
+        # # Timepoints to enforce Physics
+        # dataset = Array(reduce(hcat, dataset)')
+        # t = dataset[end, :]
+        # û = dataset[1:(end - 1), :]
+
+        # ode_params = Tar.extraparams == 1 ?
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        # if length(û[:, 1]) == 1
+        #     physsol = [f(û[:, i][1],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # else
+        #     physsol = [f(û[:, i],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # end
+        # #form of NN output matrix output dim x n
+        # deri_physsol = reduce(hcat, physsol)
+
+        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
+        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+        # if length(û[:, 1]) == 1
+        #     deri_sol = [f(û[:, i][1],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # else
+        #     deri_sol = [f(û[:, i],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # end
+        # deri_sol = reduce(hcat, deri_sol) 
+        # deri_sol = reduce(hcat, derivatives)
+
+        # Timepoints to enforce Physics 
+        t = dataset[end]
+        u1 = dataset[2]
+        û = dataset[1]
+        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
+        #  
+
+        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        if length(Tar.prob.u0) == 1
+            physsol = [f(û[i],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        else
+            physsol = [f([û[i], u1[i]],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        end
+        #form of NN output matrix output dim x n 
+        deri_physsol = reduce(hcat, physsol)
+
+        # if length(Tar.prob.u0) == 1
+        #     nnsol = [f(û[i],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # else
+        #     nnsol = [f([û[i], u1[i]],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # end
+        # form of NN output matrix output dim x n
+        # nnsol = reduce(hcat, nnsol)
+
+        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
+        # # convert to matrix as nnsol  
+
+        physlogprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # can add phystd[i] for u[i] 
+            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+                    LinearAlgebra.Diagonal(map(abs2,
+                        (Tar.l2std[i] * 4.0) .*
+                        ones(length(nnsol[i, :]))))),
+                nnsol[i, :])
+        end
+        return physlogprob
+    else
+        return 0
+    end
+end
+
+# PDE(DU,U,P,T)=0
+
+# Derivated via Central Diff
+# function calculate_derivatives2(dataset)
+#     x̂, time = dataset
+#     num_points = length(x̂)
+#     # Initialize an array to store the derivative values.
+#     derivatives = similar(x̂)
+
+#     for i in 2:(num_points - 1)
+#         # Calculate the first-order derivative using central differences.
+#         Δt_forward = time[i + 1] - time[i]
+#         Δt_backward = time[i] - time[i - 1]
+
+#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+#         derivatives[i] = derivative
+#     end
+
+#     # Derivatives at the endpoints can be calculated using forward or backward differences.
+#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+#     return derivatives
+# end
+
+function calderivatives(prob, dataset)
+    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
+        Flux.Dense(8, 2)) |> Flux.f64
+    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+    function loss(x, y)
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
+        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
+        sum(Flux.mse.(chainflux(x), y))
+    end
+    optimizer = Flux.Optimise.ADAM(0.01)
+    epochs = 3000
+    for epoch in 1:epochs
+        Flux.train!(loss,
+            Flux.params(chainflux),
+            [(dataset[end]', dataset[1:(end - 1)])],
+            optimizer)
+    end
+
+    # A1 = (prob.u0' .+
+    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
+    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
+
+    # A2 = (prob.u0' .+
+    #       (prob.tspan[2] .- (dataset[end]'))' .*
+    #       chainflux(dataset[end]')')
+
+    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
+    A2 = chainflux(dataset[end]')
+
+    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
+
+    return gradients
+end
+
+function calculate_derivatives(dataset)
+
+    # u = dataset[1]
+    # u1 = dataset[2]
+    # t = dataset[end]
+    # # control points
+    # n = Int(floor(length(t) / 10))
+    # # spline for datasetvalues(solution) 
+    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    # interp = CubicSpline(u, t)
+    # interp1 = CubicSpline(u1, t)
+    # # derrivatives interpolation
+    # dx = t[2] - t[1]
+    # time = collect(t[1]:dx:t[end])
+    # smoothu = [interp(i) for i in time]
+    # smoothu1 = [interp1(i) for i in time]
+    # # derivative of the spline (must match function derivative) 
+    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
+    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
+    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
+    # # FDM
+    # # û1 = diff(u) / dx
+    # # dataset[1] and smoothu are almost equal(rounding errors)
+    # return [û, û1] 
+
+end
\ No newline at end of file
diff --git a/test/bpinnexperimental.jl b/test/bpinnexperimental.jl
new file mode 100644
index 0000000000..153124b069
--- /dev/null
+++ b/test/bpinnexperimental.jl
@@ -0,0 +1,66 @@
+using Test, MCMCChains
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using Flux, OptimizationOptimisers, AdvancedHMC, Lux
+using Statistics, Random, Functors, ComponentArrays
+using NeuralPDE, MonteCarloMeasurements
+
+Random.seed!(110)
+
+using NeuralPDE, Lux, Plots, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 4.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+# Solve using OrdinaryDiffEq.jl solver
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+
+times = solution.t
+u = hcat(solution.u...)
+x = u[1, :] + (u[1, :]) .* (0.05 .* randn(length(u[1, :])))
+y = u[2, :] + (u[2, :]) .* (0.05 .* randn(length(u[2, :])))
+dataset = [x, y, times]
+
+plot(times, x, label = "noisy x")
+plot!(times, y, label = "noisy y")
+plot!(solution, labels = ["x" "y"])
+
+chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+    Lux.Dense(6, 2))
+
+alg = BNNODE(chain;
+dataset = dataset,
+draw_samples = 1000,
+l2std = [0.1, 0.1],
+phystd = [0.1, 0.1],
+priorsNNw = (0.0, 3.0),
+param = [
+    Normal(1, 2),
+    Normal(2, 2),
+    Normal(2, 2),
+    Normal(0, 2)], progress = false)
+
+sol_pestim = solve(prob, alg; saveat = dt)
+plot(times, sol_pestim.ensemblesol[1], label = "estimated x")
+plot!(times, sol_pestim.ensemblesol[2], label = "estimated y")
+
+# comparing it with the original solution
+plot!(solution, labels = ["true x" "true y"])
+
+sol_pestim.estimated_ode_params
\ No newline at end of file

From 058aa05eeb5dc434c825c30115b8ea7fd2d733ca Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Fri, 27 Oct 2023 16:58:42 -0400
Subject: [PATCH 003/107] fitzhughnagumo experiment and some edits

---
 src/NeuralPDE.jl                 |  1 +
 src/bayesian/advancedHMC_MCMC.jl |  6 +--
 src/bayesian/collocated_estim.jl | 10 ++---
 test/bpinnexperimental.jl        | 68 ++++++++++++++++++++++++++++----
 4 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index e38fca98d4..edfaf9664a 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -52,6 +52,7 @@ include("discretize.jl")
 include("neural_adapter.jl")
 include("bayesian/advancedHMC_MCMC.jl")
 include("bayesian/BPINN_ode.jl")
+include("bayesian/collocated_estim.jl")
 
 export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE,
        KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem,
diff --git a/src/bayesian/advancedHMC_MCMC.jl b/src/bayesian/advancedHMC_MCMC.jl
index 6b6b3303e7..740bb344a3 100644
--- a/src/bayesian/advancedHMC_MCMC.jl
+++ b/src/bayesian/advancedHMC_MCMC.jl
@@ -587,8 +587,8 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
 
             MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
             Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
-            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
-                progress = progress, verbose = verbose)
+            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor, draw_samples;
+                progress = progress, verbose = verbose, drop_warmup = true)
 
             samplesc[i] = samples
             statsc[i] = stats
@@ -606,7 +606,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
         Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
         samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
-            adaptor; progress = progress, verbose = verbose)
+            adaptor, draw_samples; progress = progress, verbose = verbose, drop_warmup = true)
 
         # return a chain(basic chain),samples and stats
         matrix_samples = hcat(samples...)
diff --git a/src/bayesian/collocated_estim.jl b/src/bayesian/collocated_estim.jl
index 157388194e..b113b76f12 100644
--- a/src/bayesian/collocated_estim.jl
+++ b/src/bayesian/collocated_estim.jl
@@ -4,10 +4,8 @@ function L2loss2(Tar::LogTargetDensity, θ)
 
     # parameter estimation chosen or not
     if Tar.extraparams > 0
-        dataset, deri_sol = Tar.dataset
         # deri_sol = deri_sol'
         autodiff = Tar.autodiff
-
         # # Timepoints to enforce Physics
         # dataset = Array(reduce(hcat, dataset)')
         # t = dataset[end, :]
@@ -48,9 +46,9 @@ function L2loss2(Tar::LogTargetDensity, θ)
         # deri_sol = reduce(hcat, derivatives)
 
         # Timepoints to enforce Physics 
-        t = dataset[end]
-        u1 = dataset[2]
-        û = dataset[1]
+        t = Tar.dataset[end]
+        u1 = Tar.dataset[2]
+        û = Tar.dataset[1]
         # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
         #  
 
@@ -69,7 +67,7 @@ function L2loss2(Tar::LogTargetDensity, θ)
             physsol = [f([û[i], u1[i]],
                 ode_params,
                 t[i])
-                       for i in 1:length(û[:, 1])]
+                       for i in 1:length(û)]
         end
         #form of NN output matrix output dim x n 
         deri_physsol = reduce(hcat, physsol)
diff --git a/test/bpinnexperimental.jl b/test/bpinnexperimental.jl
index 153124b069..ffe7fcf0f8 100644
--- a/test/bpinnexperimental.jl
+++ b/test/bpinnexperimental.jl
@@ -28,13 +28,13 @@ tspan = (0.0, 4.0)
 prob = ODEProblem(lotka_volterra, u0, tspan, p)
 
 # Solve using OrdinaryDiffEq.jl solver
-dt = 0.01
+dt = 0.2
 solution = solve(prob, Tsit5(); saveat = dt)
 
 times = solution.t
 u = hcat(solution.u...)
-x = u[1, :] + (u[1, :]) .* (0.05 .* randn(length(u[1, :])))
-y = u[2, :] + (u[2, :]) .* (0.05 .* randn(length(u[2, :])))
+x = u[1, :] + (u[1, :]) .* (0.3 .* randn(length(u[1, :])))
+y = u[2, :] + (u[2, :]) .* (0.3 .* randn(length(u[2, :])))
 dataset = [x, y, times]
 
 plot(times, x, label = "noisy x")
@@ -54,13 +54,65 @@ param = [
     Normal(1, 2),
     Normal(2, 2),
     Normal(2, 2),
-    Normal(0, 2)], progress = false)
+    Normal(0, 2)], progress = true)
 
-sol_pestim = solve(prob, alg; saveat = dt)
-plot(times, sol_pestim.ensemblesol[1], label = "estimated x")
-plot!(times, sol_pestim.ensemblesol[2], label = "estimated y")
+@time sol_pestim1 = solve(prob, alg; saveat = dt,)
+@time sol_pestim2 = solve(prob, alg; estim_collocate = true, saveat = dt)
+plot(times, sol_pestim1.ensemblesol[1], label = "estimated x1")
+plot!(times, sol_pestim2.ensemblesol[1], label = "estimated x2")
+plot!(times, sol_pestim1.ensemblesol[2], label = "estimated y1")
+plot!(times, sol_pestim2.ensemblesol[2], label = "estimated y2")
 
 # comparing it with the original solution
 plot!(solution, labels = ["true x" "true y"])
 
-sol_pestim.estimated_ode_params
\ No newline at end of file
+@show sol_pestim1.estimated_ode_params
+@show sol_pestim2.estimated_ode_params
+
+function fitz(u, p , t)
+    v, w = u[1], u[2]
+    a,b,τinv,l = p[1], p[2], p[3], p[4]
+    
+    dv = v - 0.33*v^3 -w + l
+    dw = τinv*(v +  a - b*w)
+
+    return [dv, dw]
+end
+
+prob_ode_fitzhughnagumo = ODEProblem(fitz, [1.0,1.0], (0.0,10.0), [0.7,0.8,1/12.5,0.5])
+dt = 0.5
+sol = solve(prob_ode_fitzhughnagumo, Tsit5(), saveat = dt)
+
+sig = 0.20
+data = Array(sol)
+dataset = [data[1,:] .+ (sig .* rand(length(sol.t))), data[2, :] .+ (sig .* rand(length(sol.t))), sol.t]
+priors = [truncated(Normal(0.5,1.0),0,1.5), truncated(Normal(0.5,1.0),0,1.5), truncated(Normal(0.0,0.5),0.0,0.5), truncated(Normal(0.5,1.0),0,1)]
+
+
+plot(sol.t, dataset[1], label = "noisy x")
+plot!(sol.t, dataset[2], label = "noisy y")
+plot!(sol, labels = ["x" "y"])
+
+chain = Lux.Chain(Lux.Dense(1, 10, tanh), Lux.Dense(10, 10, tanh),
+    Lux.Dense(10, 2))
+
+Adaptorkwargs = (Adaptor = AdvancedHMC.StanHMCAdaptor,
+    Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.65)
+alg = BNNODE(chain;
+dataset = dataset,
+draw_samples = 10000,
+l2std = [0.1, 0.1],
+phystd = [0.1, 0.1],
+priorsNNw = (0.01, 3.0),
+Adaptorkwargs = Adaptorkwargs,
+param = priors, progress = true)
+
+@time sol_pestim1 = solve(prob_ode_fitzhughnagumo, alg; saveat = dt)
+@time sol_pestim2 = solve(prob_ode_fitzhughnagumo, alg; estim_collocate = true, saveat = dt)
+plot!(sol.t, sol_pestim1.ensemblesol[1], label = "estimated x1")
+plot!(sol.t, sol_pestim2.ensemblesol[1], label = "estimated x2")
+plot!(sol.t, sol_pestim1.ensemblesol[2], label = "estimated y1")
+plot!(sol.t, sol_pestim2.ensemblesol[2], label = "estimated y2")
+
+@show sol_pestim1.estimated_ode_params
+@show sol_pestim2.estimated_ode_params
\ No newline at end of file

From 103e1febf7f0d4153b560ad8a962106c6bf92cde Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Sat, 28 Oct 2023 15:14:31 -0400
Subject: [PATCH 004/107] Scale logpdfs and fix chain creation

---
 src/bayesian/BPINN_ode.jl        |  6 +++++-
 src/bayesian/advancedHMC_MCMC.jl | 13 ++++++-------
 test/bpinnexperimental.jl        | 22 +++++++++++-----------
 3 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/bayesian/BPINN_ode.jl b/src/bayesian/BPINN_ode.jl
index 5c26329f14..a2cce9db34 100644
--- a/src/bayesian/BPINN_ode.jl
+++ b/src/bayesian/BPINN_ode.jl
@@ -217,8 +217,12 @@ function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
 
     if chain isa Lux.AbstractExplicitLayer
         θinit, st = Lux.setup(Random.default_rng(), chain)
+        println(length(θinit))
+        println(length(samples[1]))
+        println(draw_samples)
         θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
-             for i in (draw_samples - numensemble):draw_samples]
+             for i in 1:max(draw_samples - draw_samples ÷ 10, draw_samples - 1000)]
+        
         luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
         # only need for size
         θinit = collect(ComponentArrays.ComponentArray(θinit))
diff --git a/src/bayesian/advancedHMC_MCMC.jl b/src/bayesian/advancedHMC_MCMC.jl
index 740bb344a3..5e995ebfdb 100644
--- a/src/bayesian/advancedHMC_MCMC.jl
+++ b/src/bayesian/advancedHMC_MCMC.jl
@@ -83,9 +83,9 @@ end
 
 function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
     if Tar.estim_collocate
-        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ) + L2loss2(Tar, θ)
+        return physloglikelihood(Tar, θ)/length(Tar.dataset[1]) + priorweights(Tar, θ) + L2LossData(Tar, θ)/length(Tar.dataset[1]) + L2loss2(Tar, θ)/length(Tar.dataset[1])
     else
-        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
+        return physloglikelihood(Tar, θ)/length(Tar.dataset[1]) + priorweights(Tar, θ) + L2LossData(Tar, θ)/length(Tar.dataset[1])
     end
 end
 
@@ -587,7 +587,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
 
             MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
             Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
-            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor, draw_samples;
+            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
                 progress = progress, verbose = verbose, drop_warmup = true)
 
             samplesc[i] = samples
@@ -606,11 +606,10 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
         Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
         samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
-            adaptor, draw_samples; progress = progress, verbose = verbose, drop_warmup = true)
-
+            adaptor; progress = progress, verbose = verbose, drop_warmup = true)
         # return a chain(basic chain),samples and stats
-        matrix_samples = hcat(samples...)
-        mcmc_chain = MCMCChains.Chains(matrix_samples')
+        matrix_samples = reshape(hcat(samples...), (length(samples[1]), length(samples), 1)) 
+        mcmc_chain = MCMCChains.Chains(matrix_samples)
         return mcmc_chain, samples, stats
     end
 end
\ No newline at end of file
diff --git a/test/bpinnexperimental.jl b/test/bpinnexperimental.jl
index ffe7fcf0f8..3de049bf58 100644
--- a/test/bpinnexperimental.jl
+++ b/test/bpinnexperimental.jl
@@ -86,7 +86,7 @@ sol = solve(prob_ode_fitzhughnagumo, Tsit5(), saveat = dt)
 sig = 0.20
 data = Array(sol)
 dataset = [data[1,:] .+ (sig .* rand(length(sol.t))), data[2, :] .+ (sig .* rand(length(sol.t))), sol.t]
-priors = [truncated(Normal(0.5,1.0),0,1.5), truncated(Normal(0.5,1.0),0,1.5), truncated(Normal(0.0,0.5),0.0,0.5), truncated(Normal(0.5,1.0),0,1)]
+priors = [Normal(0.5,1.0), Normal(0.5,1.0), Normal(0.0,0.5), Normal(0.5,1.0)]
 
 
 plot(sol.t, dataset[1], label = "noisy x")
@@ -97,22 +97,22 @@ chain = Lux.Chain(Lux.Dense(1, 10, tanh), Lux.Dense(10, 10, tanh),
     Lux.Dense(10, 2))
 
 Adaptorkwargs = (Adaptor = AdvancedHMC.StanHMCAdaptor,
-    Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.65)
+    Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.8)
 alg = BNNODE(chain;
 dataset = dataset,
-draw_samples = 10000,
+draw_samples = 1000,
 l2std = [0.1, 0.1],
 phystd = [0.1, 0.1],
 priorsNNw = (0.01, 3.0),
 Adaptorkwargs = Adaptorkwargs,
 param = priors, progress = true)
 
-@time sol_pestim1 = solve(prob_ode_fitzhughnagumo, alg; saveat = dt)
-@time sol_pestim2 = solve(prob_ode_fitzhughnagumo, alg; estim_collocate = true, saveat = dt)
-plot!(sol.t, sol_pestim1.ensemblesol[1], label = "estimated x1")
-plot!(sol.t, sol_pestim2.ensemblesol[1], label = "estimated x2")
-plot!(sol.t, sol_pestim1.ensemblesol[2], label = "estimated y1")
-plot!(sol.t, sol_pestim2.ensemblesol[2], label = "estimated y2")
+@time sol_pestim3 = solve(prob_ode_fitzhughnagumo, alg; saveat = dt)
+@time sol_pestim4 = solve(prob_ode_fitzhughnagumo, alg; estim_collocate = true, saveat = dt)
+plot!(sol.t, sol_pestim3.ensemblesol[1], label = "estimated x1")
+plot!(sol.t, sol_pestim4.ensemblesol[1], label = "estimated x2")
+plot!(sol.t, sol_pestim3.ensemblesol[2], label = "estimated y1")
+plot!(sol.t, sol_pestim4.ensemblesol[2], label = "estimated y2")
 
-@show sol_pestim1.estimated_ode_params
-@show sol_pestim2.estimated_ode_params
\ No newline at end of file
+@show sol_pestim3.estimated_ode_params
+@show sol_pestim4.estimated_ode_params

From f5b4f1cb7e97a06a1b56b39efcfba33734fc74f3 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 20 Jan 2024 23:50:27 +0530
Subject: [PATCH 005/107] trying to sync

---
 src/BNNODE_new.jl       |  794 +++++++
 src/BPINN_ode.jl        |    1 -
 src/advancedHMC_MCMC.jl |  392 +---
 test/BPINN_Tests.jl     | 4084 +-----------------------------------
 test/BPINN_newform.jl   | 4354 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 5287 insertions(+), 4338 deletions(-)
 create mode 100644 src/BNNODE_new.jl
 create mode 100644 test/BPINN_newform.jl

diff --git a/src/BNNODE_new.jl b/src/BNNODE_new.jl
new file mode 100644
index 0000000000..e6b1f24faa
--- /dev/null
+++ b/src/BNNODE_new.jl
@@ -0,0 +1,794 @@
+mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
+    P <: Vector{<:Distribution},
+    D <:
+    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}},
+}
+    dim::Int
+    prob::DiffEqBase.ODEProblem
+    chain::C
+    st::S
+    strategy::ST
+    dataset::D
+    priors::P
+    phystd::Vector{Float64}
+    l2std::Vector{Float64}
+    autodiff::Bool
+    physdt::Float64
+    extraparams::Int
+    init_params::I
+
+    function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
+        dataset,
+        priors, phystd, l2std, autodiff, physdt, extraparams,
+        init_params::AbstractVector)
+        new{
+            typeof(chain),
+            Nothing,
+            typeof(strategy),
+            typeof(init_params),
+            typeof(priors),
+            typeof(dataset),
+        }(dim,
+            prob,
+            chain,
+            nothing, strategy,
+            dataset,
+            priors,
+            phystd,
+            l2std,
+            autodiff,
+            physdt,
+            extraparams,
+            init_params)
+    end
+    function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
+        dataset,
+        priors, phystd, l2std, autodiff, physdt, extraparams,
+        init_params::NamedTuple)
+        new{
+            typeof(chain),
+            typeof(st),
+            typeof(strategy),
+            typeof(init_params),
+            typeof(priors),
+            typeof(dataset),
+        }(dim,
+            prob,
+            chain, st, strategy,
+            dataset, priors,
+            phystd, l2std,
+            autodiff,
+            physdt,
+            extraparams,
+            init_params)
+    end
+end
+
+"""
+cool function to convert parameter's vector to ComponentArray of parameters (for Lux Chain: vector of samples -> Lux ComponentArrays)
+"""
+function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
+    @assert length(ps_new) == Lux.parameterlength(ps)
+    i = 1
+    function get_ps(x)
+        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
+        i += length(x)
+        return z
+    end
+    return Functors.fmap(get_ps, ps)
+end
+
+function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
+    return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
+    #  +  L2loss2(Tar, θ)
+end
+
+LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
+
+function LogDensityProblems.capabilities(::LogTargetDensity)
+    LogDensityProblems.LogDensityOrder{1}()
+end
+
+# suggested extra loss function
+function L2loss2(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        dataset, deri_sol = Tar.dataset
+        # deri_sol = deri_sol'
+        autodiff = Tar.autodiff
+
+        # # Timepoints to enforce Physics
+        # dataset = Array(reduce(hcat, dataset)')
+        # t = dataset[end, :]
+        # û = dataset[1:(end - 1), :]
+
+        # ode_params = Tar.extraparams == 1 ?
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        # if length(û[:, 1]) == 1
+        #     physsol = [f(û[:, i][1],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # else
+        #     physsol = [f(û[:, i],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # end
+        # #form of NN output matrix output dim x n
+        # deri_physsol = reduce(hcat, physsol)
+
+        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
+        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+        # if length(û[:, 1]) == 1
+        #     deri_sol = [f(û[:, i][1],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # else
+        #     deri_sol = [f(û[:, i],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # end
+        # deri_sol = reduce(hcat, deri_sol) 
+        # deri_sol = reduce(hcat, derivatives)
+
+        # Timepoints to enforce Physics 
+        t = dataset[end]
+        u1 = dataset[2]
+        û = dataset[1]
+        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
+        #  
+
+        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        if length(Tar.prob.u0) == 1
+            physsol = [f(û[i],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        else
+            physsol = [f([û[i], u1[i]],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        end
+        #form of NN output matrix output dim x n 
+        deri_physsol = reduce(hcat, physsol)
+
+        # if length(Tar.prob.u0) == 1
+        #     nnsol = [f(û[i],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # else
+        #     nnsol = [f([û[i], u1[i]],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # end
+        # form of NN output matrix output dim x n
+        # nnsol = reduce(hcat, nnsol)
+
+        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
+        # # convert to matrix as nnsol  
+
+        physlogprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # can add phystd[i] for u[i] 
+            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+                    LinearAlgebra.Diagonal(map(abs2,
+                        (Tar.l2std[i] * 4.0) .*
+                        ones(length(nnsol[i, :]))))),
+                nnsol[i, :])
+        end
+        return physlogprob
+    else
+        return 0
+    end
+end
+
+# PDE(DU,U,P,T)=0
+
+# Derivated via Central Diff
+# function calculate_derivatives2(dataset)
+#     x̂, time = dataset
+#     num_points = length(x̂)
+#     # Initialize an array to store the derivative values.
+#     derivatives = similar(x̂)
+
+#     for i in 2:(num_points - 1)
+#         # Calculate the first-order derivative using central differences.
+#         Δt_forward = time[i + 1] - time[i]
+#         Δt_backward = time[i] - time[i - 1]
+
+#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+#         derivatives[i] = derivative
+#     end
+
+#     # Derivatives at the endpoints can be calculated using forward or backward differences.
+#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+#     return derivatives
+# end
+
+function calderivatives(prob, dataset)
+    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
+        Flux.Dense(8, 2)) |> Flux.f64
+    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+    function loss(x, y)
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
+        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
+        sum(Flux.mse.(chainflux(x), y))
+    end
+    optimizer = Flux.Optimise.ADAM(0.01)
+    epochs = 3000
+    for epoch in 1:epochs
+        Flux.train!(loss,
+            Flux.params(chainflux),
+            [(dataset[end]', dataset[1:(end - 1)])],
+            optimizer)
+    end
+
+    # A1 = (prob.u0' .+
+    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
+    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
+
+    # A2 = (prob.u0' .+
+    #       (prob.tspan[2] .- (dataset[end]'))' .*
+    #       chainflux(dataset[end]')')
+
+    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
+    A2 = chainflux(dataset[end]')
+
+    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
+
+    return gradients
+end
+
+function calculate_derivatives(dataset)
+
+    # u = dataset[1]
+    # u1 = dataset[2]
+    # t = dataset[end]
+    # # control points
+    # n = Int(floor(length(t) / 10))
+    # # spline for datasetvalues(solution) 
+    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    # interp = CubicSpline(u, t)
+    # interp1 = CubicSpline(u1, t)
+    # # derrivatives interpolation
+    # dx = t[2] - t[1]
+    # time = collect(t[1]:dx:t[end])
+    # smoothu = [interp(i) for i in time]
+    # smoothu1 = [interp1(i) for i in time]
+    # # derivative of the spline (must match function derivative) 
+    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
+    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
+    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
+    # # FDM
+    # # û1 = diff(u) / dx
+    # # dataset[1] and smoothu are almost equal(rounding errors)
+    # return [û, û1] 
+
+end
+
+"""
+L2 loss loglikelihood(needed for ODE parameter estimation)
+"""
+function L2LossData(Tar::LogTargetDensity, θ)
+    dataset = Tar.dataset
+    # check if dataset is provided
+    if dataset isa Vector{Nothing} || Tar.extraparams == 0
+        return 0
+    else
+        # matrix(each row corresponds to vector u's rows)
+        nn = Tar(dataset[end], θ[1:(length(θ) - Tar.extraparams)])
+
+        L2logprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
+            L2logprob += logpdf(MvNormal(nn[i, :],
+                    LinearAlgebra.Diagonal(map(abs2,
+                        (Tar.l2std[i] * 0.5) .*
+                        ones(length(dataset[i]))))),
+                dataset[i])
+        end
+        return L2logprob
+    end
+end
+
+"""
+physics loglikelihood over problem timespan + dataset timepoints
+"""
+function physloglikelihood(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+    p = Tar.prob.p
+    tspan = Tar.prob.tspan
+    autodiff = Tar.autodiff
+    strategy = Tar.strategy
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+    else
+        ode_params = p == SciMLBase.NullParameters() ? [] : p
+    end
+
+    return getlogpdf(strategy, Tar, f, autodiff, tspan, ode_params, θ)
+end
+
+function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
+    tspan,
+    ode_params, θ)
+    if Tar.dataset isa Vector{Nothing}
+        t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
+    else
+        t = vcat(collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]),
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+function getlogpdf(strategy::StochasticTraining,
+    Tar::LogTargetDensity,
+    f,
+    autodiff::Bool,
+    tspan,
+    ode_params,
+    θ)
+    if Tar.dataset isa Vector{Nothing}
+        t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
+    else
+        t = vcat([(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)],
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
+    autodiff::Bool,
+    tspan,
+    ode_params, θ)
+    function integrand(t::Number, θ)
+        innerdiff(Tar, f, autodiff, [t], θ, ode_params)
+    end
+    intprob = IntegralProblem(integrand, tspan[1], tspan[2], θ; nout = length(Tar.prob.u0))
+    # add dataset logpdf?
+    sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
+    sum(sol.u)
+end
+
+function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
+    autodiff::Bool,
+    tspan,
+    ode_params, θ)
+    minT = tspan[1]
+    maxT = tspan[2]
+
+    weights = strategy.weights ./ sum(strategy.weights)
+
+    N = length(weights)
+    points = strategy.points
+
+    difference = (maxT - minT) / N
+
+    data = Float64[]
+    for (index, item) in enumerate(weights)
+        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
+                    ((index - 1) * difference)
+        data = append!(data, temp_data)
+    end
+
+    if Tar.dataset isa Vector{Nothing}
+        t = data
+    else
+        t = vcat(data,
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+"""
+MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ 
+"""
+function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
+    ode_params)
+
+    # Tar used for phi and LogTargetDensity object attributes access
+    out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
+
+    # # reject samples case(write clear reason why)
+    if any(isinf, out[:, 1]) || any(isinf, ode_params)
+        return -Inf
+    end
+
+    # this is a vector{vector{dx,dy}}(handle case single u(float passed))
+    if length(out[:, 1]) == 1
+        physsol = [f(out[:, i][1],
+            ode_params,
+            t[i])
+                   for i in 1:length(out[1, :])]
+    else
+        physsol = [f(out[:, i],
+            ode_params,
+            t[i])
+                   for i in 1:length(out[1, :])]
+    end
+    physsol = reduce(hcat, physsol)
+
+    nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+    vals = nnsol .- physsol
+
+    # N dimensional vector if N outputs for NN(each row has logpdf of i[i] where u is vector of dependant variables)
+    return [logpdf(MvNormal(vals[i, :],
+            LinearAlgebra.Diagonal(map(abs2,
+                Tar.phystd[i] .*
+                ones(length(vals[i, :]))))),
+        zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
+end
+
+"""
+prior logpdf for NN parameters + ODE constants
+"""
+function priorweights(Tar::LogTargetDensity, θ)
+    allparams = Tar.priors
+    # nn weights
+    nnwparams = allparams[1]
+
+    if Tar.extraparams > 0
+        # Vector of ode parameters priors
+        invpriors = allparams[2:end]
+
+        invlogpdf = sum(logpdf(invpriors[length(θ) - i + 1], θ[i])
+                        for i in (length(θ) - Tar.extraparams + 1):length(θ); init = 0.0)
+
+        return (invlogpdf
+                +
+                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
+    else
+        return logpdf(nnwparams, θ)
+    end
+end
+
+function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params)
+    θ, st = Lux.setup(Random.default_rng(), chain)
+    return init_params, chain, st
+end
+
+function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
+    θ, st = Lux.setup(Random.default_rng(), chain)
+    return θ, chain, st
+end
+
+function generate_Tar(chain::Flux.Chain, init_params)
+    θ, re = Flux.destructure(chain)
+    return init_params, re, nothing
+end
+
+function generate_Tar(chain::Flux.Chain, init_params::Nothing)
+    θ, re = Flux.destructure(chain)
+    # find_good_stepsize,phasepoint takes only float64
+    return θ, re, nothing
+end
+
+"""
+nn OUTPUT AT t,θ ~ phi(t,θ)
+"""
+function (f::LogTargetDensity{C, S})(t::AbstractVector,
+    θ) where {C <: Optimisers.Restructure, S}
+    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
+end
+
+function (f::LogTargetDensity{C, S})(t::AbstractVector,
+    θ) where {C <: Lux.AbstractExplicitLayer, S}
+    θ = vector_to_parameters(θ, f.init_params)
+    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
+    ChainRulesCore.@ignore_derivatives f.st = st
+    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
+end
+
+function (f::LogTargetDensity{C, S})(t::Number,
+    θ) where {C <: Optimisers.Restructure, S}
+    #  must handle paired odes hence u0 broadcasted
+    f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t]))
+end
+
+function (f::LogTargetDensity{C, S})(t::Number,
+    θ) where {C <: Lux.AbstractExplicitLayer, S}
+    θ = vector_to_parameters(θ, f.init_params)
+    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
+    ChainRulesCore.@ignore_derivatives f.st = st
+    f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y
+end
+
+"""
+similar to ode_dfdx() in NNODE/ode_solve.jl
+"""
+function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
+    if autodiff
+        hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...)
+    else
+        (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
+    end
+end
+
+function kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog, δ, λ)
+    if Kernel == HMC
+        Kernel(n_leapfrog)
+    elseif Kernel == HMCDA
+        Kernel(δ, λ)
+    else
+        Kernel(δ, max_depth = max_depth, Δ_max = Δ_max)
+    end
+end
+
+function integratorchoice(Integrator, initial_ϵ, jitter_rate,
+    tempering_rate)
+    if Integrator == JitteredLeapfrog
+        Integrator(initial_ϵ, jitter_rate)
+    elseif Integrator == TemperedLeapfrog
+        Integrator(initial_ϵ, tempering_rate)
+    else
+        Integrator(initial_ϵ)
+    end
+end
+
+function adaptorchoice(Adaptor, mma, ssa)
+    if Adaptor != AdvancedHMC.NoAdaptation()
+        Adaptor(mma, ssa)
+    else
+        AdvancedHMC.NoAdaptation()
+    end
+end
+
+"""
+```julia
+ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
+                    dataset = [nothing],init_params = nothing, 
+                    draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
+                    phystd = [0.05], priorsNNw = (0.0, 2.0),
+                    param = [],nchains = 1,autodiff = false, Kernel = HMC,
+                    Integrator = Leapfrog, Adaptor = StanHMCAdaptor,
+                    targetacceptancerate = 0.8, Metric = DiagEuclideanMetric,
+                    jitter_rate = 3.0, tempering_rate = 3.0, max_depth = 10,
+                    Δ_max = 1000, n_leapfrog = 10, δ = 0.65, λ = 0.3,
+                    progress = false,verbose = false)
+```
+!!! warn
+
+    Note that ahmc_bayesian_pinn_ode() only supports ODEs which are written in the out-of-place form, i.e.
+    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the ahmc_bayesian_pinn_ode()
+    will exit with an error.
+
+## Example
+linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = [5.0, -5.0]
+prob = ODEProblem(linear, u0, tspan, p)
+
+# CREATE DATASET (Necessity for accurate Parameter estimation)
+sol = solve(prob, Tsit5(); saveat = 0.05)
+u = sol.u[1:100]
+time = sol.t[1:100]
+
+# dataset and BPINN create
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+dataset = [x̂, time]
+
+chainflux1 = Flux.Chain(Flux.Dense(1, 5, tanh), Flux.Dense(5, 5, tanh), Flux.Dense(5, 1)
+
+# simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
+fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob,chainflux1,
+                                                                          dataset = dataset,
+                                                                          draw_samples = 1500,
+                                                                          l2std = [0.05],
+                                                                          phystd = [0.05],
+                                                                          priorsNNw = (0.0,3.0))
+
+# solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
+fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob,chainflux1,
+                                                                          dataset = dataset,
+                                                                          draw_samples = 1500,
+                                                                          l2std = [0.05],
+                                                                          phystd = [0.05],
+                                                                          priorsNNw = (0.0,3.0),
+                                                                          param = [Normal(6.5,0.5),Normal(-3,0.5)])
+
+## NOTES 
+Dataset is required for accurate Parameter estimation + solving equations
+Incase you are only solving the Equations for solution, do not provide dataset
+
+## Positional Arguments
+* `prob`: DEProblem(out of place and the function signature should be f(u,p,t)
+* `chain`: Lux/Flux Neural Netork which would be made the Bayesian PINN
+
+## Keyword Arguments
+* `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
+* `dataset`: Vector containing Vectors of corresponding u,t values 
+* `init_params`: intial parameter values for BPINN (ideally for multiple chains different initializations preferred)
+* `nchains`: number of chains you want to sample (random initialisation of params by default)
+* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
+* `l2std`: standard deviation of BPINN predicition against L2 losses/Dataset
+* `phystd`: standard deviation of BPINN predicition against Chosen Underlying ODE System
+* `priorsNNw`: Vector of [mean, std] for BPINN parameter. Weights and Biases of BPINN are Normal Distributions by default
+* `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
+* `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
+* `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
+
+# AHMC.jl is still developing convenience structs so might need changes on new releases.
+* `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implemenations HMC/NUTS/HMCDA)
+* `targetacceptancerate`: Target percentage(in decimal) of iterations in which the proposals were accepted(0.8 by default)
+* `Integrator(jitter_rate, tempering_rate), Metric, Adaptor`: https://turinglang.org/AdvancedHMC.jl/stable/
+* `max_depth`: Maximum doubling tree depth (NUTS)
+* `Δ_max`: Maximum divergence during doubling tree (NUTS)
+* `n_leapfrog`: number of leapfrog steps for HMC
+* `δ`: target acceptance probability for NUTS/HMCDA
+* `λ`: target trajectory length for HMCDA
+* `progress`: controls whether to show the progress meter or not.
+* `verbose`: controls the verbosity. (Sample call args in AHMC)
+
+"""
+
+"""
+dataset would be (x̂,t)
+priors: pdf for W,b + pdf for ODE params
+"""
+function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
+    strategy = GridTraining, dataset = [nothing],
+    init_params = nothing, draw_samples = 1000,
+    physdt = 1 / 20.0, l2std = [0.05],
+    phystd = [0.05], priorsNNw = (0.0, 2.0),
+    param = [], nchains = 1, autodiff = false,
+    Kernel = HMC, Integrator = Leapfrog,
+    Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
+    Metric = DiagEuclideanMetric, jitter_rate = 3.0,
+    tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
+    n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = false,
+    verbose = false)
+
+    # NN parameter prior mean and variance(PriorsNN must be a tuple)
+    if isinplace(prob)
+        throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
+    end
+
+    strategy = strategy == GridTraining ? strategy(physdt) : strategy
+
+    if dataset != [nothing] &&
+       (length(dataset) < 2 || !(typeof(dataset) <: Vector{<:Vector{<:AbstractFloat}}))
+        throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
+    end
+
+    if dataset != [nothing] && param == []
+        println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
+    elseif dataset == [nothing] && param != []
+        throw(error("Dataset Required for Parameter Estimation."))
+    end
+
+    if chain isa Lux.AbstractExplicitLayer || chain isa Flux.Chain
+        # Flux-vector, Lux-Named Tuple
+        initial_nnθ, recon, st = generate_Tar(chain, init_params)
+    else
+        error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")
+    end
+
+    if nchains > Threads.nthreads()
+        throw(error("number of chains is greater than available threads"))
+    elseif nchains < 1
+        throw(error("number of chains must be greater than 1"))
+    end
+
+    # eltype(physdt) cause needs Float64 for find_good_stepsize
+    if chain isa Lux.AbstractExplicitLayer
+        # Lux chain(using component array later as vector_to_parameter need namedtuple)
+        initial_θ = collect(eltype(physdt),
+            vcat(ComponentArrays.ComponentArray(initial_nnθ)))
+    else
+        initial_θ = collect(eltype(physdt), initial_nnθ)
+    end
+
+    # adding ode parameter estimation
+    nparameters = length(initial_θ)
+    ninv = length(param)
+    priors = [
+        MvNormal(priorsNNw[1] * ones(nparameters),
+            LinearAlgebra.Diagonal(map(abs2, priorsNNw[2] .* ones(nparameters)))),
+    ]
+
+    # append Ode params to all paramvector
+    if ninv > 0
+        # shift ode params(initialise ode params by prior means)
+        initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
+        priors = vcat(priors, param)
+        nparameters += ninv
+    end
+
+    t0 = prob.tspan[1]
+    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
+    ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
+        phystd, l2std, autodiff, physdt, ninv, initial_nnθ)
+
+    try
+        ℓπ(t0, initial_θ[1:(nparameters - ninv)])
+    catch err
+        if isa(err, DimensionMismatch)
+            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
+        else
+            throw(err)
+        end
+    end
+
+    # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
+    metric = Metric(nparameters)
+    hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
+
+    println("physics Logpdf is : ", physloglikelihood(ℓπ, initial_θ))
+    println("prior Logpdf is : ", priorweights(ℓπ, initial_θ))
+    println("L2lossData Logpdf is : ", L2LossData(ℓπ, initial_θ))
+    println("L2loss2 Logpdf is : ", L2loss2(ℓπ, initial_θ))
+
+    # parallel sampling option
+    if nchains != 1
+        # Cache to store the chains
+        chains = Vector{Any}(undef, nchains)
+        statsc = Vector{Any}(undef, nchains)
+        samplesc = Vector{Any}(undef, nchains)
+
+        Threads.@threads for i in 1:nchains
+            # each chain has different initial NNparameter values(better posterior exploration)
+            initial_θ = vcat(randn(nparameters - ninv),
+                initial_θ[(nparameters - ninv + 1):end])
+            initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
+            integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate,
+                tempering_rate)
+            adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
+                StepSizeAdaptor(targetacceptancerate, integrator))
+            Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max,
+                    n_leapfrog, δ, λ), integrator)
+            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
+                progress = progress, verbose = verbose)
+
+            samplesc[i] = samples
+            statsc[i] = stats
+            mcmc_chain = Chains(hcat(samples...)')
+            chains[i] = mcmc_chain
+        end
+
+        return chains, samplesc, statsc
+    else
+        initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
+        integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate, tempering_rate)
+        adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
+            StepSizeAdaptor(targetacceptancerate, integrator))
+        Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog,
+                δ, λ), integrator)
+        samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
+            adaptor; progress = progress, verbose = verbose)
+
+        # return a chain(basic chain),samples and stats
+        matrix_samples = hcat(samples...)
+        mcmc_chain = MCMCChains.Chains(matrix_samples')
+        return mcmc_chain, samples, stats
+    end
+end
\ No newline at end of file
diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index f9a68b8917..f79f5208f2 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -284,7 +284,6 @@ function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
         push!(ensemblecurves, ensemblecurve)
     end
 
-    # estimated using all samples
     nnparams = length(θinit)
     estimnnparams = [Particles(reduce(hcat, samples)[i, :]) for i in 1:nnparams]
 
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index e6b1f24faa..6fee4a818e 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -18,9 +18,9 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
     init_params::I
 
     function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
-        dataset,
-        priors, phystd, l2std, autodiff, physdt, extraparams,
-        init_params::AbstractVector)
+            dataset,
+            priors, phystd, l2std, autodiff, physdt, extraparams,
+            init_params::AbstractVector)
         new{
             typeof(chain),
             Nothing,
@@ -42,9 +42,9 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
             init_params)
     end
     function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
-        dataset,
-        priors, phystd, l2std, autodiff, physdt, extraparams,
-        init_params::NamedTuple)
+            dataset,
+            priors, phystd, l2std, autodiff, physdt, extraparams,
+            init_params::NamedTuple)
         new{
             typeof(chain),
             typeof(st),
@@ -65,9 +65,11 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
 end
 
 """
-cool function to convert parameter's vector to ComponentArray of parameters (for Lux Chain: vector of samples -> Lux ComponentArrays)
+function needed for converting vector of sampled parameters into ComponentVector in case of Lux chain output, derivatives
+the sampled parameters are of exotic type `Dual` due to ForwardDiff's autodiff tagging
 """
-function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
+function vector_to_parameters(ps_new::AbstractVector,
+        ps::Union{NamedTuple, ComponentArrays.ComponentVector})
     @assert length(ps_new) == Lux.parameterlength(ps)
     i = 1
     function get_ps(x)
@@ -78,9 +80,10 @@ function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
     return Functors.fmap(get_ps, ps)
 end
 
+vector_to_parameters(ps_new::AbstractVector, ps::AbstractVector) = ps_new
+
 function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
     return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
-    #  +  L2loss2(Tar, θ)
 end
 
 LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
@@ -89,221 +92,24 @@ function LogDensityProblems.capabilities(::LogTargetDensity)
     LogDensityProblems.LogDensityOrder{1}()
 end
 
-# suggested extra loss function
-function L2loss2(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        dataset, deri_sol = Tar.dataset
-        # deri_sol = deri_sol'
-        autodiff = Tar.autodiff
-
-        # # Timepoints to enforce Physics
-        # dataset = Array(reduce(hcat, dataset)')
-        # t = dataset[end, :]
-        # û = dataset[1:(end - 1), :]
-
-        # ode_params = Tar.extraparams == 1 ?
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        # if length(û[:, 1]) == 1
-        #     physsol = [f(û[:, i][1],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # else
-        #     physsol = [f(û[:, i],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # end
-        # #form of NN output matrix output dim x n
-        # deri_physsol = reduce(hcat, physsol)
-
-        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
-        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
-        # if length(û[:, 1]) == 1
-        #     deri_sol = [f(û[:, i][1],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # else
-        #     deri_sol = [f(û[:, i],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # end
-        # deri_sol = reduce(hcat, deri_sol) 
-        # deri_sol = reduce(hcat, derivatives)
-
-        # Timepoints to enforce Physics 
-        t = dataset[end]
-        u1 = dataset[2]
-        û = dataset[1]
-        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
-        #  
-
-        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        if length(Tar.prob.u0) == 1
-            physsol = [f(û[i],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        else
-            physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        end
-        #form of NN output matrix output dim x n 
-        deri_physsol = reduce(hcat, physsol)
-
-        # if length(Tar.prob.u0) == 1
-        #     nnsol = [f(û[i],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # else
-        #     nnsol = [f([û[i], u1[i]],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # end
-        # form of NN output matrix output dim x n
-        # nnsol = reduce(hcat, nnsol)
-
-        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
-        # # convert to matrix as nnsol  
-
-        physlogprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # can add phystd[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 4.0) .*
-                        ones(length(nnsol[i, :]))))),
-                nnsol[i, :])
-        end
-        return physlogprob
-    else
-        return 0
-    end
-end
-
-# PDE(DU,U,P,T)=0
-
-# Derivated via Central Diff
-# function calculate_derivatives2(dataset)
-#     x̂, time = dataset
-#     num_points = length(x̂)
-#     # Initialize an array to store the derivative values.
-#     derivatives = similar(x̂)
-
-#     for i in 2:(num_points - 1)
-#         # Calculate the first-order derivative using central differences.
-#         Δt_forward = time[i + 1] - time[i]
-#         Δt_backward = time[i] - time[i - 1]
-
-#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-#         derivatives[i] = derivative
-#     end
-
-#     # Derivatives at the endpoints can be calculated using forward or backward differences.
-#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-#     return derivatives
-# end
-
-function calderivatives(prob, dataset)
-    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
-        Flux.Dense(8, 2)) |> Flux.f64
-    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-    function loss(x, y)
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
-        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
-        sum(Flux.mse.(chainflux(x), y))
-    end
-    optimizer = Flux.Optimise.ADAM(0.01)
-    epochs = 3000
-    for epoch in 1:epochs
-        Flux.train!(loss,
-            Flux.params(chainflux),
-            [(dataset[end]', dataset[1:(end - 1)])],
-            optimizer)
-    end
-
-    # A1 = (prob.u0' .+
-    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
-    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
-
-    # A2 = (prob.u0' .+
-    #       (prob.tspan[2] .- (dataset[end]'))' .*
-    #       chainflux(dataset[end]')')
-
-    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
-    A2 = chainflux(dataset[end]')
-
-    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
-
-    return gradients
-end
-
-function calculate_derivatives(dataset)
-
-    # u = dataset[1]
-    # u1 = dataset[2]
-    # t = dataset[end]
-    # # control points
-    # n = Int(floor(length(t) / 10))
-    # # spline for datasetvalues(solution) 
-    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    # interp = CubicSpline(u, t)
-    # interp1 = CubicSpline(u1, t)
-    # # derrivatives interpolation
-    # dx = t[2] - t[1]
-    # time = collect(t[1]:dx:t[end])
-    # smoothu = [interp(i) for i in time]
-    # smoothu1 = [interp1(i) for i in time]
-    # # derivative of the spline (must match function derivative) 
-    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
-    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
-    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
-    # # FDM
-    # # û1 = diff(u) / dx
-    # # dataset[1] and smoothu are almost equal(rounding errors)
-    # return [û, û1] 
-
-end
-
 """
 L2 loss loglikelihood(needed for ODE parameter estimation)
 """
 function L2LossData(Tar::LogTargetDensity, θ)
-    dataset = Tar.dataset
     # check if dataset is provided
-    if dataset isa Vector{Nothing} || Tar.extraparams == 0
+    if Tar.dataset isa Vector{Nothing} || Tar.extraparams == 0
         return 0
     else
         # matrix(each row corresponds to vector u's rows)
-        nn = Tar(dataset[end], θ[1:(length(θ) - Tar.extraparams)])
+        nn = Tar(Tar.dataset[end], θ[1:(length(θ) - Tar.extraparams)])
 
         L2logprob = 0
         for i in 1:length(Tar.prob.u0)
             # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
             L2logprob += logpdf(MvNormal(nn[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 0.5) .*
-                        ones(length(dataset[i]))))),
-                dataset[i])
+                    LinearAlgebra.Diagonal(abs2.(Tar.l2std[i] .*
+                                                 ones(length(Tar.dataset[i]))))),
+                Tar.dataset[i])
         end
         return L2logprob
     end
@@ -332,8 +138,8 @@ function physloglikelihood(Tar::LogTargetDensity, θ)
 end
 
 function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
-    tspan,
-    ode_params, θ)
+        tspan,
+        ode_params, θ)
     if Tar.dataset isa Vector{Nothing}
         t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
     else
@@ -346,12 +152,12 @@ function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::B
 end
 
 function getlogpdf(strategy::StochasticTraining,
-    Tar::LogTargetDensity,
-    f,
-    autodiff::Bool,
-    tspan,
-    ode_params,
-    θ)
+        Tar::LogTargetDensity,
+        f,
+        autodiff::Bool,
+        tspan,
+        ode_params,
+        θ)
     if Tar.dataset isa Vector{Nothing}
         t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
     else
@@ -364,22 +170,21 @@ function getlogpdf(strategy::StochasticTraining,
 end
 
 function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
-    autodiff::Bool,
-    tspan,
-    ode_params, θ)
+        autodiff::Bool,
+        tspan,
+        ode_params, θ)
     function integrand(t::Number, θ)
         innerdiff(Tar, f, autodiff, [t], θ, ode_params)
     end
     intprob = IntegralProblem(integrand, tspan[1], tspan[2], θ; nout = length(Tar.prob.u0))
-    # add dataset logpdf?
     sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
     sum(sol.u)
 end
 
 function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
-    autodiff::Bool,
-    tspan,
-    ode_params, θ)
+        autodiff::Bool,
+        tspan,
+        ode_params, θ)
     minT = tspan[1]
     maxT = tspan[2]
 
@@ -412,7 +217,7 @@ end
 MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ 
 """
 function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
-    ode_params)
+        ode_params)
 
     # Tar used for phi and LogTargetDensity object attributes access
     out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
@@ -442,9 +247,8 @@ function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector,
 
     # N dimensional vector if N outputs for NN(each row has logpdf of i[i] where u is vector of dependant variables)
     return [logpdf(MvNormal(vals[i, :],
-            LinearAlgebra.Diagonal(map(abs2,
-                Tar.phystd[i] .*
-                ones(length(vals[i, :]))))),
+            LinearAlgebra.Diagonal(abs2.(Tar.phystd[i] .*
+                                         ones(length(vals[i, :]))))),
         zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
 end
 
@@ -496,12 +300,12 @@ end
 nn OUTPUT AT t,θ ~ phi(t,θ)
 """
 function (f::LogTargetDensity{C, S})(t::AbstractVector,
-    θ) where {C <: Optimisers.Restructure, S}
+        θ) where {C <: Optimisers.Restructure, S}
     f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
 end
 
 function (f::LogTargetDensity{C, S})(t::AbstractVector,
-    θ) where {C <: Lux.AbstractExplicitLayer, S}
+        θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
     y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
@@ -509,13 +313,13 @@ function (f::LogTargetDensity{C, S})(t::AbstractVector,
 end
 
 function (f::LogTargetDensity{C, S})(t::Number,
-    θ) where {C <: Optimisers.Restructure, S}
+        θ) where {C <: Optimisers.Restructure, S}
     #  must handle paired odes hence u0 broadcasted
     f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t]))
 end
 
 function (f::LogTargetDensity{C, S})(t::Number,
-    θ) where {C <: Lux.AbstractExplicitLayer, S}
+        θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
     y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
@@ -533,21 +337,27 @@ function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
     end
 end
 
-function kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog, δ, λ)
-    if Kernel == HMC
-        Kernel(n_leapfrog)
-    elseif Kernel == HMCDA
+function kernelchoice(Kernel, MCMCkwargs)
+    if Kernel == HMCDA
+        δ, λ = MCMCkwargs[:δ], MCMCkwargs[:λ]
         Kernel(δ, λ)
-    else
+    elseif Kernel == NUTS
+        δ, max_depth, Δ_max = MCMCkwargs[:δ], MCMCkwargs[:max_depth], MCMCkwargs[:Δ_max]
         Kernel(δ, max_depth = max_depth, Δ_max = Δ_max)
+    else
+        # HMC
+        n_leapfrog = MCMCkwargs[:n_leapfrog]
+        Kernel(n_leapfrog)
     end
 end
 
-function integratorchoice(Integrator, initial_ϵ, jitter_rate,
-    tempering_rate)
+function integratorchoice(Integratorkwargs, initial_ϵ)
+    Integrator = Integratorkwargs[:Integrator]
     if Integrator == JitteredLeapfrog
+        jitter_rate = Integratorkwargs[:jitter_rate]
         Integrator(initial_ϵ, jitter_rate)
     elseif Integrator == TemperedLeapfrog
+        tempering_rate = Integratorkwargs[:tempering_rate]
         Integrator(initial_ϵ, tempering_rate)
     else
         Integrator(initial_ϵ)
@@ -568,12 +378,12 @@ ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
                     dataset = [nothing],init_params = nothing, 
                     draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
                     phystd = [0.05], priorsNNw = (0.0, 2.0),
-                    param = [],nchains = 1,autodiff = false, Kernel = HMC,
-                    Integrator = Leapfrog, Adaptor = StanHMCAdaptor,
-                    targetacceptancerate = 0.8, Metric = DiagEuclideanMetric,
-                    jitter_rate = 3.0, tempering_rate = 3.0, max_depth = 10,
-                    Δ_max = 1000, n_leapfrog = 10, δ = 0.65, λ = 0.3,
-                    progress = false,verbose = false)
+                    param = [], nchains = 1, autodiff = false, Kernel = HMC,
+                    Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+                        Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+                    Integratorkwargs = (Integrator = Leapfrog,),
+                    MCMCkwargs = (n_leapfrog = 30,),
+                    progress = false, verbose = false)
 ```
 !!! warn
 
@@ -626,47 +436,48 @@ Incase you are only solving the Equations for solution, do not provide dataset
 
 ## Keyword Arguments
 * `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
-* `dataset`: Vector containing Vectors of corresponding u,t values 
 * `init_params`: intial parameter values for BPINN (ideally for multiple chains different initializations preferred)
-* `nchains`: number of chains you want to sample (random initialisation of params by default)
+* `nchains`: number of chains you want to sample
 * `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
-* `l2std`: standard deviation of BPINN predicition against L2 losses/Dataset
-* `phystd`: standard deviation of BPINN predicition against Chosen Underlying ODE System
-* `priorsNNw`: Vector of [mean, std] for BPINN parameter. Weights and Biases of BPINN are Normal Distributions by default
+* `l2std`: standard deviation of BPINN prediction against L2 losses/Dataset
+* `phystd`: standard deviation of BPINN prediction against Chosen Underlying ODE System
+* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of BPINN are Normal Distributions by default.
 * `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
 * `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
 * `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
 
-# AHMC.jl is still developing convenience structs so might need changes on new releases.
+# AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
 * `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implemenations HMC/NUTS/HMCDA)
-* `targetacceptancerate`: Target percentage(in decimal) of iterations in which the proposals were accepted(0.8 by default)
-* `Integrator(jitter_rate, tempering_rate), Metric, Adaptor`: https://turinglang.org/AdvancedHMC.jl/stable/
-* `max_depth`: Maximum doubling tree depth (NUTS)
-* `Δ_max`: Maximum divergence during doubling tree (NUTS)
-* `n_leapfrog`: number of leapfrog steps for HMC
-* `δ`: target acceptance probability for NUTS/HMCDA
-* `λ`: target trajectory length for HMCDA
+* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+    Note: Target percentage(in decimal) of iterations in which the proposals are accepted (0.8 by default)
+* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's(HMC/NUTS/HMCDA) Arguments, as follows :
+    * `n_leapfrog`: number of leapfrog steps for HMC
+    * `δ`: target acceptance probability for NUTS and HMCDA
+    * `λ`: target trajectory length for HMCDA
+    * `max_depth`: Maximum doubling tree depth (NUTS)
+    * `Δ_max`: Maximum divergence during doubling tree (NUTS)
+    Refer: https://turinglang.org/AdvancedHMC.jl/stable/
 * `progress`: controls whether to show the progress meter or not.
 * `verbose`: controls the verbosity. (Sample call args in AHMC)
 
 """
 
 """
-dataset would be (x̂,t)
 priors: pdf for W,b + pdf for ODE params
 """
 function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
-    strategy = GridTraining, dataset = [nothing],
-    init_params = nothing, draw_samples = 1000,
-    physdt = 1 / 20.0, l2std = [0.05],
-    phystd = [0.05], priorsNNw = (0.0, 2.0),
-    param = [], nchains = 1, autodiff = false,
-    Kernel = HMC, Integrator = Leapfrog,
-    Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
-    Metric = DiagEuclideanMetric, jitter_rate = 3.0,
-    tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
-    n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = false,
-    verbose = false)
+        strategy = GridTraining, dataset = [nothing],
+        init_params = nothing, draw_samples = 1000,
+        physdt = 1 / 20.0, l2std = [0.05],
+        phystd = [0.05], priorsNNw = (0.0, 2.0),
+        param = [], nchains = 1, autodiff = false,
+        Kernel = HMC,
+        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+        Integratorkwargs = (Integrator = Leapfrog,),
+        MCMCkwargs = (n_leapfrog = 30,),
+        progress = false, verbose = false)
 
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)
@@ -676,7 +487,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     strategy = strategy == GridTraining ? strategy(physdt) : strategy
 
     if dataset != [nothing] &&
-       (length(dataset) < 2 || !(typeof(dataset) <: Vector{<:Vector{<:AbstractFloat}}))
+       (length(dataset) < 2 || !(dataset isa Vector{<:Vector{<:AbstractFloat}}))
         throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
     end
 
@@ -713,7 +524,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     ninv = length(param)
     priors = [
         MvNormal(priorsNNw[1] * ones(nparameters),
-            LinearAlgebra.Diagonal(map(abs2, priorsNNw[2] .* ones(nparameters)))),
+            LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters)))),
     ]
 
     # append Ode params to all paramvector
@@ -739,15 +550,17 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         end
     end
 
+    @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, initial_θ))
+    @info("Current Prior Log-likelihood : ", priorweights(ℓπ, initial_θ))
+    @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
+
+    Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
+    Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
+
     # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
     metric = Metric(nparameters)
     hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
 
-    println("physics Logpdf is : ", physloglikelihood(ℓπ, initial_θ))
-    println("prior Logpdf is : ", priorweights(ℓπ, initial_θ))
-    println("L2lossData Logpdf is : ", L2LossData(ℓπ, initial_θ))
-    println("L2loss2 Logpdf is : ", L2loss2(ℓπ, initial_θ))
-
     # parallel sampling option
     if nchains != 1
         # Cache to store the chains
@@ -760,12 +573,12 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
             initial_θ = vcat(randn(nparameters - ninv),
                 initial_θ[(nparameters - ninv + 1):end])
             initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
-            integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate,
-                tempering_rate)
+            integrator = integratorchoice(Integratorkwargs, initial_ϵ)
             adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
                 StepSizeAdaptor(targetacceptancerate, integrator))
-            Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max,
-                    n_leapfrog, δ, λ), integrator)
+
+            MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
+            Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
             samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
                 progress = progress, verbose = verbose)
 
@@ -778,14 +591,21 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         return chains, samplesc, statsc
     else
         initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
-        integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate, tempering_rate)
+        integrator = integratorchoice(Integratorkwargs, initial_ϵ)
         adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
             StepSizeAdaptor(targetacceptancerate, integrator))
-        Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog,
-                δ, λ), integrator)
+
+        MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
+        Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
         samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
             adaptor; progress = progress, verbose = verbose)
 
+        @info("Sampling Complete.")
+        @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, samples[end]))
+        @info("Current Prior Log-likelihood : ", priorweights(ℓπ, samples[end]))
+        @info("Current MSE against dataset Log-likelihood : ",
+            L2LossData(ℓπ, samples[end]))
+
         # return a chain(basic chain),samples and stats
         matrix_samples = hcat(samples...)
         mcmc_chain = MCMCChains.Chains(matrix_samples')
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index fa2f04073e..cb0303daf0 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -9,18 +9,6 @@ using NeuralPDE, MonteCarloMeasurements
 # on latest Julia version it performs much better for below tests
 Random.seed!(100)
 
-# for sampled params->lux ComponentArray
-function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
-    @assert length(ps_new) == Lux.parameterlength(ps)
-    i = 1
-    function get_ps(x)
-        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
-        i += length(x)
-        return z
-    end
-    return Functors.fmap(get_ps, ps)
-end
-
 ## PROBLEM-1 (WITHOUT PARAMETER ESTIMATION)
 linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
 linear = (u, p, t) -> cos(2 * π * t)
@@ -49,12 +37,10 @@ init1, re1 = destructure(chainflux)
 θinit, st = Lux.setup(Random.default_rng(), chainlux)
 
 fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux,
-    draw_samples = 2500,
-    n_leapfrog = 30)
+    draw_samples = 2500)
 
 fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux,
-    draw_samples = 2500,
-    n_leapfrog = 30)
+    draw_samples = 2500)
 
 # can change training strategies by adding this to call (Quadratuer and GridTraining show good results but stochastics sampling techniques perform bad)
 # strategy = QuadratureTraining(; quadrature_alg = QuadGKJL(),
@@ -62,12 +48,10 @@ fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux,
 #     abstol = 1e-3, maxiters = 1000,
 #     batch = 0)
 
-alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500,
-    n_leapfrog = 30)
+alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500)
 sol1flux = solve(prob, alg)
 
-alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500,
-    n_leapfrog = 30)
+alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500)
 sol1lux = solve(prob, alg)
 
 # testing points
@@ -109,9 +93,9 @@ u = sol1.u
 time = sol1.t
 
 # BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
-ta = range(tspan[1], tspan[2], length = 25)
+ta = range(tspan[1], tspan[2], length = 100)
 u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) .+ (0.2 .* Array(u) .* randn(size(u))))
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
 time = vec(collect(Float64, ta))
 dataset = [x̂, time]
 physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
@@ -123,10 +107,6 @@ x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
 time1 = vec(collect(Float64, ta0))
 physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 
-using Plots, StatsPlots
-# plot(dataset[2], calderivatives(dataset)')
-yu = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-plot(yu, [linear_analytic(u0, p, t) for t in yu])
 chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
 chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
 init1, re1 = destructure(chainflux1)
@@ -135,88 +115,37 @@ init1, re1 = destructure(chainflux1)
 fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
     dataset = dataset,
     draw_samples = 2500,
-    physdt = 1 / 50.0f0,
+    physdt = 1 / 50.0,
     priorsNNw = (0.0,
         3.0),
     param = [
         LogNormal(9,
             0.5),
-    ],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
+    ])
 
 fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
     dataset = dataset,
     draw_samples = 2500,
-    physdt = 1 / 50.0f0,
+    physdt = 1 / 50.0,
     priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
+    param = [LogNormal(9, 0.5)])
 
 alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 10.0),
-    l2std = [0.005], phystd = [0.01],
-    param = [Normal(11, 6)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
-# original paper (pure data 0 1)
-sol1flux = solve(prob, alg)
-sol1flux.estimated_ode_params
-# pure data method 1 1
-sol2flux = solve(prob, alg)
-sol2flux.estimated_ode_params
-# pure data method 1 0
-sol3flux = solve(prob, alg)
-sol3flux.estimated_ode_params
-# deri collocation
-sol4flux = solve(prob, alg)
-sol4flux.estimated_ode_params
-# collocation
-sol5flux = solve(prob, alg)
-sol5flux.estimated_ode_params
-# collocation + L2Data loss(at 9,0.5 1,2 gives same)
-sol6flux = solve(prob, alg)
-sol6flux.estimated_ode_params
-# 2500 iters
-sol7flux = solve(prob, alg)
-sol7flux.estimated_ode_params
-
-plotly()
-plot!(yu, sol1flux.ensemblesol[1])
-plot!(yu, sol2flux.ensemblesol[1])
-plot!(yu, sol3flux.ensemblesol[1])
-plot!(yu, sol4flux.ensemblesol[1])
-plot!(yu, sol5flux.ensemblesol[1])
-plot!(yu, sol6flux.ensemblesol[1])
-
-plot!(dataset[2], dataset[1])
-
-# plot!(sol4flux.ensemblesol[1])
-# plot!(sol5flux.ensemblesol[1])
-
-sol2flux.estimated_ode_params
-
-sol1flux.estimated_ode_params
-
-sol3flux.estimated_ode_params
-
-sol4flux.estimated_ode_params
+    draw_samples = 2500, physdt = 1 / 50.0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)])
 
-sol5flux.estimated_ode_params
+sol2flux = solve(prob, alg)
 
 alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset,
     draw_samples = 2500,
-    physdt = 1 / 50.0f0,
+    physdt = 1 / 50.0,
     priorsNNw = (0.0,
         3.0),
     param = [
         LogNormal(9,
             0.5),
-    ],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
+    ])
 
 sol2lux = solve(prob, alg)
 
@@ -238,16 +167,16 @@ meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 @test mean(abs.(physsol1 .- meanscurve2)) < 0.15
 
 # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.25 * p)
-@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.25 * p)
+@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.35 * p)
+@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.35 * p)
 
 #-------------------------- solve() call  
 @test mean(abs.(physsol1_1 .- sol2flux.ensemblesol[1])) < 8e-2
 @test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
 
 # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - sol1flux.estimated_ode_params[1]) < abs(0.15 * p)
-@test abs(p - sol2lux.estimated_ode_params[1]) < abs(0.15 * p)
+@test abs(p - sol2flux.estimated_de_params[1]) < abs(0.15 * p)
+@test abs(p - sol2lux.estimated_de_params[1]) < abs(0.15 * p)
 
 ## PROBLEM-2
 linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
@@ -277,37 +206,6 @@ chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6
 init1, re1 = destructure(chainflux12)
 θinit, st = Lux.setup(Random.default_rng(), chainlux12)
 
-using Flux
-using Random
-
-function derivatives(chainflux, dataset)
-    loss(x, y) = Flux.mse(chainflux(x), y)
-    optimizer = Flux.Optimise.ADAM(0.01)
-    epochs = 2500
-    for epoch in 1:epochs
-        Flux.train!(loss, Flux.params(chainflux), [(dataset[2]', dataset[1]')], optimizer)
-    end
-    getgradient(chainflux, dataset)
-end
-
-function getgradient(chainflux, dataset)
-    return (chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64)))) .-
-            chainflux(dataset[end]')) ./
-           sqrt(eps(eltype(dataset[end][1])))
-end
-
-ans = derivatives(chainflux12, dataset)
-
-init3, re = destructure(chainflux12)
-init2 == init1
-init3 == init2
-plot!(dataset[end], ans')
-plot!(dataset[end], chainflux12(dataset[end]')')
-
-ars = getgradient(chainflux12, dataset)
-
-plot!(dataset[end], ars')
-
 fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob,
     chainflux12,
     draw_samples = 1500,
@@ -315,8 +213,7 @@ fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(pro
     phystd = [
         0.03],
     priorsNNw = (0.0,
-        10.0),
-    n_leapfrog = 30)
+        10.0))
 
 fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(prob,
     chainflux12,
@@ -331,16 +228,14 @@ fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(pro
     param = [
         Normal(-7,
             4),
-    ],
-    n_leapfrog = 30)
+    ])
 
 fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
     draw_samples = 1500,
     l2std = [0.03],
     phystd = [0.03],
     priorsNNw = (0.0,
-        10.0),
-    n_leapfrog = 30)
+        10.0))
 
 fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
     dataset = dataset,
@@ -352,13 +247,12 @@ fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob,
     param = [
         Normal(-7,
             4),
-    ],
-    n_leapfrog = 30)
+    ])
 
-alg1 = NeuralPDE.BNNODE(chainflux12,
+alg = NeuralPDE.BNNODE(chainflux12,
     dataset = dataset,
-    draw_samples = 500,
-    l2std = [0.01],
+    draw_samples = 1500,
+    l2std = [0.03],
     phystd = [
         0.03,
     ],
@@ -367,51 +261,10 @@ alg1 = NeuralPDE.BNNODE(chainflux12,
     param = [
         Normal(-7,
             4),
-    ],
-    n_leapfrog = 30, progress = true)
+    ])
 
-# original paper (pure data 0 1)
-sol1flux_pestim = solve(prob, alg1)
-sol1flux_pestim.estimated_ode_params
-# pure data method 1 1
-sol2flux_pestim = solve(prob, alg1)
-sol2flux_pestim.estimated_ode_params
-# pure data method 1 0
-sol3flux_pestim = solve(prob, alg1)
-sol3flux_pestim.estimated_ode_params
-# deri collocation
-sol4flux_pestim = solve(prob, alg1)
-sol4flux_pestim.estimated_ode_params
-# collocation
-sol5flux_pestim = solve(prob, alg1)
-sol5flux_pestim.estimated_ode_params
-# collocation + L2Data loss(at 9,0.5 1,2 gives same)
-sol6flux_pestim = solve(prob, alg1)
-sol6flux_pestim.estimated_ode_params
+sol3flux_pestim = solve(prob, alg)
 
-using Plots, StatsPlots
-ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-plot(time, u)
-plot!(ars, sol1flux_pestim.ensemblesol[1])
-plot!(ars, sol2flux_pestim.ensemblesol[1])
-plot!(ars, sol3flux_pestim.ensemblesol[1])
-plot!(ars, sol4flux_pestim.ensemblesol[1])
-plot!(ars, sol5flux_pestim.ensemblesol[1])
-plot!(ars, sol6flux_pestim.ensemblesol[1])
-
-sol3flux_pestim.estimated_ode_params
-
-sol4flux_pestim.estimated_ode_params
-
-sol5flux_pestim.estimated_ode_params
-
-sol6flux_pestim.estimated_ode_params
-
-ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-
-init, re1 = destructure(chainflux12)
-init
-init1
 alg = NeuralPDE.BNNODE(chainlux12,
     dataset = dataset,
     draw_samples = 1500,
@@ -422,8 +275,7 @@ alg = NeuralPDE.BNNODE(chainlux12,
     param = [
         Normal(-7,
             4),
-    ],
-    n_leapfrog = 30)
+    ])
 
 sol3lux_pestim = solve(prob, alg)
 
@@ -474,3881 +326,11 @@ param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
 # (flux chain)
 @test mean(abs.(physsol2 .- sol3flux_pestim.ensemblesol[1])) < 0.15
 # estimated parameters(flux chain)
-param1 = sol3flux_pestim.estimated_ode_params[1]
+param1 = sol3flux_pestim.estimated_de_params[1]
 @test abs(param1 - p) < abs(0.45 * p)
 
 # (lux chain)
 @test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
 # estimated parameters(lux chain)
-param1 = sol3lux_pestim.estimated_ode_params[1]
-@test abs(param1 - p) < abs(0.45 * p)
-
-using Plots, StatsPlots
-using NoiseRobustDifferentiation, Weave, DataInterpolations
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood
-# # 25 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-#     draw_samples = 1500, physdt = 1 / 50.0f0, phystd = [0.01],
-#     l2std = [0.01],
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1 = solve(prob, alg)
-# sol2flux1.estimated_ode_params[1] #6.41722 Particles{Float64, 1}, 6.02404 Particles{Float64, 1}
-# sol2flux2 = solve(prob, alg)
-# sol2flux2.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.07509 Particles{Float64, 1}
-# sol2flux3 = solve(prob, alg)
-# sol2flux3.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.00825 Particles{Float64, 1}
-
-# # 50 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11 = solve(prob, alg)
-# sol2flux11.estimated_ode_params[1] #5.71268 Particles{Float64, 1}, 6.07242 Particles{Float64, 1}
-# sol2flux22 = solve(prob, alg)
-# sol2flux22.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.04837 Particles{Float64, 1}
-# sol2flux33 = solve(prob, alg)
-# sol2flux33.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.02838 Particles{Float64, 1}
-
-# # 100 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111 = solve(prob, alg)
-# sol2flux111.estimated_ode_params[1] #6.59097 Particles{Float64, 1}, 5.89384 Particles{Float64, 1}
-# sol2flux222 = solve(prob, alg)
-# sol2flux222.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.88216 Particles{Float64, 1}
-# sol2flux333 = solve(prob, alg)
-# sol2flux333.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.85327 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, full likelihood cdm
-# # 25 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1_cdm = solve(prob, alg)
-# sol2flux1_cdm.estimated_ode_params[1]# 6.50506 Particles{Float64, 1} ,6.38963 Particles{Float64, 1}
-# sol2flux2_cdm = solve(prob, alg)
-# sol2flux2_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.39817 Particles{Float64, 1}
-# sol2flux3_cdm = solve(prob, alg)
-# sol2flux3_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.36296 Particles{Float64, 1}
-
-# # 50 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11_cdm = solve(prob, alg)
-# sol2flux11_cdm.estimated_ode_params[1] #6.52951 Particles{Float64, 1},5.15621 Particles{Float64, 1}
-# sol2flux22_cdm = solve(prob, alg)
-# sol2flux22_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.16363 Particles{Float64, 1}
-# sol2flux33_cdm = solve(prob, alg)
-# sol2flux33_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.15591 Particles{Float64, 1}
-
-# # 100 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111_cdm = solve(prob, alg)
-# sol2flux111_cdm.estimated_ode_params[1] #6.74338 Particles{Float64, 1}, 9.72422 Particles{Float64, 1}
-# sol2flux222_cdm = solve(prob, alg)
-# sol2flux222_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.71991 Particles{Float64, 1}
-# sol2flux333_cdm = solve(prob, alg)
-# sol2flux333_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.75045 Particles{Float64, 1}
-
-# --------------------------------------------------------------------------------------
-#                              NEW SERIES OF TESTS (IN ORDER OF EXECUTION)
-#  -------------------------------------------------------------------------------------
-# original paper implementaion
-# 25 points
-ta = range(tspan[1], tspan[2], length = 25)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, u .+ 0.05 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset1 = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-# scatter!(time, u)
-# dataset
-# scatter!(dataset1[2], dataset1[1])
-# plot(time, physsol1)
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_normal = solve(prob, alg)
-sol2flux1_normal.estimated_ode_params[1]  #7.70593 Particles{Float64, 1}, 6.36096 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
-sol2flux2_normal = solve(prob, alg)
-sol2flux2_normal.estimated_ode_params[1] #6.66347 Particles{Float64, 1}, 6.36974 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
-sol2flux3_normal = solve(prob, alg)
-sol2flux3_normal.estimated_ode_params[1] #6.84827 Particles{Float64, 1}, 6.29555 Particles{Float64, 1} | 6.39947 Particles{Float64, 1}
-
-# 50 points
-ta = range(tspan[1], tspan[2], length = 50)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset2 = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_normal = solve(prob, alg)
-sol2flux11_normal.estimated_ode_params[1] #7.83577 Particles{Float64, 1},6.24652 Particles{Float64, 1} | 6.34495 Particles{Float64, 1}
-sol2flux22_normal = solve(prob, alg)
-sol2flux22_normal.estimated_ode_params[1] #6.49477 Particles{Float64, 1},6.2118 Particles{Float64, 1} | 6.32476 Particles{Float64, 1}
-sol2flux33_normal = solve(prob, alg)
-sol2flux33_normal.estimated_ode_params[1] #6.47421 Particles{Float64, 1},6.33687 Particles{Float64, 1} | 6.2448 Particles{Float64, 1}
-
-# 100 points
-ta = range(tspan[1], tspan[2], length = 100)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset3 = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_normal = solve(prob, alg)
-sol2flux111_normal.estimated_ode_params[1] #5.96604 Particles{Float64, 1},5.99588 Particles{Float64, 1} | 6.19805 Particles{Float64, 1}
-sol2flux222_normal = solve(prob, alg)
-sol2flux222_normal.estimated_ode_params[1] #6.05432 Particles{Float64, 1},6.0768 Particles{Float64, 1} | 6.22948 Particles{Float64, 1}
-sol2flux333_normal = solve(prob, alg)
-sol2flux333_normal.estimated_ode_params[1] #6.08856 Particles{Float64, 1},5.94819 Particles{Float64, 1} | 6.2551 Particles{Float64, 1}
-
-# LOTKA VOLTERRA CASE 
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u01 = [1.0, 1.0]
-p1 = [1.5, 1.0, 3.0, 1.0]
-tspan1 = (0.0, 6.0)
-prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
-
-# chainlux = Lux.Chain(Lux.Dense(1, 7, Lux.tanh), Lux.Dense(7, 7, Lux.tanh), Lux.Dense(7, 2))
-chainflux1 = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2))
-
-#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
-t1 = collect(Float64, prob1.tspan[1]:(1 / 50.0):prob1.tspan[2])
-
-# --------------------------------------------------------------------------
-# original paper implementaion lotka volterra
-# 31 points  
-solution1 = solve(prob1, Tsit5(); saveat = 0.1)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] .+ 0.3 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] .+ 0.3 .* u1[2, :] .* randn(length(u1[2, :]))
-dataset2_1 = [x1, y1, time1]
-plot(dataset2_1[end], dataset2_1[1])
-plot!(dataset2_1[end], dataset2_1[2])
-plot!(time1, u1[1, :])
-plot!(time1, u1[2, :])
-
-alg1 = NeuralPDE.BNNODE(chainflux1,
-    dataset = dataset2_1,
-    draw_samples = 1000,
-    physdt = 1 / 20.0,
-    l2std = [
-        0.2,
-        0.2,
-    ],
-    phystd = [
-        0.5,
-        0.5,
-    ],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(4,
-            3),
-        Normal(-2,
-            4),
-        Normal(0,
-            5),
-        Normal(2.5,
-            2)],
-    n_leapfrog = 30, progress = true)
-
-# original paper (pure data 0 1)
-sol1flux1_lotka = solve(prob1, alg1)
-sol1flux1_lotka.estimated_ode_params
-# pure data method 1 1
-sol2flux1_lotka = solve(prob1, alg1)
-sol2flux1_lotka.estimated_ode_params
-# pure data method 1 0
-sol3flux1_lotka = solve(prob1, alg1)
-sol3flux1_lotka.estimated_ode_params
-# deri collocation
-sol4flux1_lotka = solve(prob1, alg1)
-sol4flux1_lotka.estimated_ode_params
-# collocation
-sol5flux1_lotka = solve(prob1, alg1)
-sol5flux1_lotka.estimated_ode_params
-# collocation + L2Data loss(at 9,0.5 1,2 gives same)
-sol6flux1_lotka = solve(prob1, alg1)
-sol6flux1_lotka.estimated_ode_params
-
-sol7flux1_lotka = solve(prob1, alg1)
-sol7flux1_lotka.estimated_ode_params
-
-using Plots, StatsPlots
-plot(dataset2_1[3], u1[1, :])
-plot!(dataset2_1[3], u1[2, :])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol5flux1_normal.ensemblesol[2])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]),
-    sol1flux1_normal.ensemblesol[1],
-    legend = :outerbottomleft)
-sol1flux2_normal = solve(prob1, alg1)
-sol1flux2_normal.estimated_ode_params  #|
-sol1flux3_normal = solve(prob1, alg1)
-sol1flux3_normal.estimated_ode_params  #|
-sol1flux4_normal = solve(prob1, alg1)
-sol1flux4_normal.estimated_ode_params
-
-plotly()
-plot!(title = "yuh")
-plot!(dataset2_1[3], dataset2_1[1])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux1_normal.ensemblesol[1])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux2_normal.ensemblesol[1])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux3_normal.ensemblesol[2])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux4_normal.ensemblesol[1])
-plot(time1, u1[1, :])
-plot!(time1, u1[2, :])
-
-ars = chainflux1(dataset2_1[end]')
-plot(ars[1, :])
-plot!(ars[2, :])
-
-function calculate_derivatives(dataset)
-    u = dataset[1]
-    u1 = dataset[2]
-    t = dataset[end]
-    # control points
-    n = Int(floor(length(t) / 10))
-    # spline for datasetvalues(solution) 
-    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    interp = CubicSpline(u, t)
-    interp1 = CubicSpline(u1, t)
-    # derrivatives interpolation
-    dx = t[2] - t[1]
-    time = collect(t[1]:dx:t[end])
-    smoothu = [interp(i) for i in time]
-    smoothu1 = [interp1(i) for i in time]
-    # derivative of the spline (must match function derivative) 
-    û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
-    û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
-    # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
-    # FDM
-    # û1 = diff(u) / dx
-    # dataset[1] and smoothu are almost equal(rounding errors)
-    return û, û1
-    # return 1
-end
-
-ar = calculate_derivatives(dataset2_1)
-plot(ar[1])
-plot!(ar[2])
-
-# 61 points
-solution1 = solve(prob1, Tsit5(); saveat = 0.1)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_2 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_2,
-    draw_samples = 1000,
-    l2std = [
-        0.1,
-        0.1,
-    ],
-    phystd = [
-        0.1,
-        0.1,
-    ],
-    priorsNNw = (0.0,
-        5.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_normal = solve(prob1, alg1)
-sol1flux11_normal.estimated_ode_params #|
-sol1flux22_normal = solve(prob1, alg1)
-sol1flux22_normal.estimated_ode_params #|
-sol1flux33_normal = solve(prob1, alg1)
-sol1flux33_normal.estimated_ode_params #|
-
-# 121 points
-solution1 = solve(prob1, Tsit5(); saveat = 0.05)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_3 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_3,
-    draw_samples = 1000,
-    l2std = [
-        0.1,
-        0.1,
-    ],
-    phystd = [
-        0.1,
-        0.1,
-    ],
-    priorsNNw = (0.0,
-        5.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_normal = solve(prob1, alg1)
-sol1flux111_normal.estimated_ode_params #|
-sol1flux222_normal = solve(prob1, alg1)
-sol1flux222_normal.estimated_ode_params #|
-sol1flux333_normal = solve(prob1, alg1)
-sol1flux333_normal.estimated_ode_params #| 
-
-# -------------------------------------------------------------------- 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# Sampling 100%|███████████████████████████████| Time: 0:02:30
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# Sampling 100%|███████████████████████████████| Time: 0:01:54
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# Sampling 100%|███████████████████████████████| Time: 0:01:59
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# Sampling 100%|███████████████████████████████| Time: 0:02:44
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# Sampling 100%|███████████████████████████████| Time: 0:02:41
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# Sampling 100%|███████████████████████████████| Time: 0:02:41
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# Sampling 100%|███████████████████████████████| Time: 0:03:52
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# Sampling 100%|███████████████████████████████| Time: 0:03:49
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# Sampling 100%|███████████████████████████████| Time: 0:03:50
-
-# # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
-# physics Logpdf is : -6.659143464386241e7
-# prior Logpdf is : -150.30074579848434
-# L2lossData Logpdf is : -6.03075717462954e6
-# Sampling 100%|███████████████████████████████| Time: 0:04:54
-
-# physics Logpdf is : -8.70012053004202e8
-# prior Logpdf is : -150.3750892952511
-# L2lossData Logpdf is : -6.967914805207133e6
-# Sampling 100%|███████████████████████████████| Time: 0:05:09
-
-# physics Logpdf is : -5.417241281343099e7
-# prior Logpdf is : -150.52079555737976
-# L2lossData Logpdf is : -4.195953436792884e6
-# Sampling 100%|███████████████████████████████| Time: 0:05:01
-
-# physics Logpdf is : -4.579552981943833e8
-# prior Logpdf is : -150.30491731974283
-# L2lossData Logpdf is : -8.595475827260146e6
-# Sampling 100%|███████████████████████████████| Time: 0:06:08
-
-# physics Logpdf is : -1.989281834955769e7
-# prior Logpdf is : -150.16009042727543
-# L2lossData Logpdf is : -1.121270659669029e7
-# Sampling 100%|███████████████████████████████| Time: 0:05:38
-
-# physics Logpdf is : -8.683829147264534e8
-# prior Logpdf is : -150.37824872259102
-# L2lossData Logpdf is : -1.0887662888035845e7
-# Sampling 100%|███████████████████████████████| Time: 0:05:50
-
-# physics Logpdf is : -3.1944760610332566e8
-# prior Logpdf is : -150.33610348737565
-# L2lossData Logpdf is : -1.215458786744478e7
-# Sampling 100%|███████████████████████████████| Time: 0:10:50
-
-# physics Logpdf is : -3.2884572300341567e6
-# prior Logpdf is : -150.21002268156343
-# L2lossData Logpdf is : -1.102536731511176e7
-# Sampling 100%|███████████████████████████████| Time: 0:09:53
-
-# physics Logpdf is : -5.31293521002414e8
-# prior Logpdf is : -150.20948536040126
-# L2lossData Logpdf is : -1.818717239584132e7
-# Sampling 100%|███████████████████████████████| Time: 0:08:53
-
-# ----------------------------------------------------------
-# Full likelihood no l2 only new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new = solve(prob, alg)
-sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.21662 Particles{Float64, 1}
-sol2flux2_new = solve(prob, alg)
-sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 7.14238 Particles{Float64, 1}
-sol2flux3_new = solve(prob, alg)
-sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.79159 Particles{Float64, 1}
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new = solve(prob, alg)
-sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 5.33467 Particles{Float64, 1}
-sol2flux22_new = solve(prob, alg)
-sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.52419 Particles{Float64, 1}
-sol2flux33_new = solve(prob, alg)
-sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 5.36921 Particles{Float64, 1}
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new = solve(prob, alg)
-sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.45333 Particles{Float64, 1}
-sol2flux222_new = solve(prob, alg)
-sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 4.64417 Particles{Float64, 1}
-sol2flux333_new = solve(prob, alg)
-sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 5.88037 Particles{Float64, 1}
-# ---------------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new_all = solve(prob, alg)
-sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.4358 Particles{Float64, 1}
-sol2flux2_new_all = solve(prob, alg)
-sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 6.52449 Particles{Float64, 1}
-sol2flux3_new_all = solve(prob, alg)
-sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.34188 Particles{Float64, 1}
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new_all = solve(prob, alg)
-sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.37889 Particles{Float64, 1}
-sol2flux22_new_all = solve(prob, alg)
-sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.34747 Particles{Float64, 1}
-sol2flux33_new_all = solve(prob, alg)
-sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.39699 Particles{Float64, 1}
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new_all = solve(prob, alg)
-sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.24327 Particles{Float64, 1}
-sol2flux222_new_all = solve(prob, alg)
-sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 6.23928 Particles{Float64, 1}
-sol2flux333_new_all = solve(prob, alg)
-sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 6.2145 Particles{Float64, 1}
-
-# ---------------------------------------------------------------------------
-# Full likelihood  l2 + new L22(dataset gradients) lotka volterra
-# 36 points 
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_1,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux1_new_all = solve(prob1, alg1)
-sol1flux1_new_all.estimated_ode_params[1]  #|
-sol1flux2_new_all = solve(prob1, alg1)
-sol1flux2_new_all.estimated_ode_params[1] #|
-sol1flux3_new_all = solve(prob1, alg1)
-sol1flux3_new_all.estimated_ode_params[1] #|
-
-# 61 points 
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_2,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_new_all = solve(prob1, alg1)
-sol1flux11_new_all.estimated_ode_params[1] #|
-sol1flux22_new_all = solve(prob1, alg1)
-sol1flux22_new_all.estimated_ode_params[1] #|
-sol1flux33_new_all = solve(prob1, alg1)
-sol1flux33_new_all.estimated_ode_params[1] #|
-
-# 121 points 
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_3,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_new_all = solve(prob1, alg1)
-sol1flux111_new_all.estimated_ode_params[1] #|
-sol1flux222_new_all = solve(prob1, alg1)
-sol1flux222_new_all.estimated_ode_params[1] #|
-sol1flux333_new_all = solve(prob1, alg1)
-sol1flux333_new_all.estimated_ode_params[1] #|
-# -------------------------------------------------------------------- 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# L2loss2 Logpdf is : -757.9047847584478
-# Sampling 100%|███████████████████████████████| Time: 0:02:32
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# L2loss2 Logpdf is : -757.9047847584478
-# Sampling 100%|███████████████████████████████| Time: 0:02:19
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# L2loss2 Logpdf is : -757.9047847584478
-# Sampling 100%|███████████████████████████████| Time: 0:02:31
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# L2loss2 Logpdf is : -1517.3653615845183
-# Sampling 100%|███████████████████████████████| Time: 0:03:45
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# L2loss2 Logpdf is : -1517.3653615845183
-# Sampling 100%|███████████████████████████████| Time: 0:03:20
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# L2loss2 Logpdf is : -1517.3653615845183
-# Sampling 100%|███████████████████████████████| Time: 0:03:20
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# L2loss2 Logpdf is : -3037.8868319811254
-# Sampling 100%|███████████████████████████████| Time: 0:04:57
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# L2loss2 Logpdf is : -3037.8868319811254
-# Sampling 100%|███████████████████████████████| Time: 0:05:26
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# L2loss2 Logpdf is : -3037.8868319811254
-# Sampling 100%|███████████████████████████████| Time: 0:05:01
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(dataset gradients)
-# 25 points
-# 1*,2*,  
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_newdata_all = solve(prob, alg)
-sol2flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 5.73072 Particles{Float64, 1}
-sol2flux2_newdata_all = solve(prob, alg)
-sol2flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 5.71597 Particles{Float64, 1}
-sol2flux3_newdata_all = solve(prob, alg)
-sol2flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 5.7313 Particles{Float64, 1}
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_newdata_all = solve(prob, alg)
-sol2flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.07153 Particles{Float64, 1}
-sol2flux22_newdata_all = solve(prob, alg)
-sol2flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.06623 Particles{Float64, 1}
-sol2flux33_newdata_all = solve(prob, alg)
-sol2flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.12748 Particles{Float64, 1}
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_newdata_all = solve(prob, alg)
-sol2flux111_newdata_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.26222 Particles{Float64, 1}
-sol2flux222_newdata_all = solve(prob, alg)
-sol2flux222_newdata_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 5.86494 Particles{Float64, 1}
-sol2flux333_newdata_all = solve(prob, alg)
-sol2flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} |  
-
-# ---------------------------------------------------------------------------
-
-# LOTKA VOLTERRA CASE
-using Plots, StatsPlots
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u01 = [1.0, 1.0]
-p1 = [1.5, 1.0, 3.0, 1.0]
-tspan1 = (0.0, 6.0)
-prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
-
-chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
-
-#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
-t1 = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-
-# --------------------------------------------------------------------------
-# original paper implementaion
-# 25 points  
-solution1 = solve(prob1, Tsit5(); saveat = 0.2)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_1 = [x1, y1, time1]
-
-plot(time1, u1[1, :])
-plot!(time1, u1[2, :])
-scatter!(dataset2_1[3], dataset2_1[1])
-scatter!(dataset2_1[3], dataset2_1[2])
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_1,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux1_normal = solve(prob1, alg1)
-sol1flux1_normal.estimated_ode_params[1]  #|
-sol1flux2_normal = solve(prob1, alg1)
-sol1flux2_normal.estimated_ode_params[1] #|
-sol1flux3_normal = solve(prob1, alg1)
-sol1flux3_normal.estimated_ode_params[1] #|
-
-# 50 points
-solution1 = solve(prob1, Tsit5(); saveat = 0.05)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_2 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_2,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_normal = solve(prob1, alg1)
-sol1flux11_normal.estimated_ode_params[1] #|
-sol1flux22_normal = solve(prob1, alg1)
-sol1flux22_normal.estimated_ode_params[1] #|
-sol1flux33_normal = solve(prob1, alg1)
-sol1flux33_normal.estimated_ode_params[1] #|
-
-# 100 points
-solution = solve(prob1, Tsit5(); saveat = 0.05)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_3 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_3,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_normal = solve(prob1, alg1)
-sol1flux111_normal.estimated_ode_params[1] #|
-sol1flux222_normal = solve(prob1, alg1)
-sol1flux222_normal.estimated_ode_params[1] #|
-sol1flux333_normal = solve(prob1, alg1)
-sol1flux333_normal.estimated_ode_params[1] #|
-
-# --------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood no l2 only new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new = solve(prob, alg)
-sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
-sol2flux2_new = solve(prob, alg)
-sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
-sol2flux3_new = solve(prob, alg)
-sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new = solve(prob, alg)
-sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
-sol2flux22_new = solve(prob, alg)
-sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
-sol2flux33_new = solve(prob, alg)
-sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new = solve(prob, alg)
-sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}   |
-sol2flux222_new = solve(prob, alg)
-sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}     |
-sol2flux333_new = solve(prob, alg)
-sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
-# ---------------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new_all = solve(prob, alg)
-sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1}  |
-sol2flux2_new_all = solve(prob, alg)
-sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}    |
-sol2flux3_new_all = solve(prob, alg)
-sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}   |
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new_all = solve(prob, alg)
-sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
-sol2flux22_new_all = solve(prob, alg)
-sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
-sol2flux33_new_all = solve(prob, alg)
-sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new_all = solve(prob, alg)
-sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}  |
-sol2flux222_new_all = solve(prob, alg)
-sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}    |
-sol2flux333_new_all = solve(prob, alg)
-sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}  |
-
-# ---------------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(dataset gradients)
-# 25 points 
-# *1,*2 vs *2.5
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol1flux1_newdata_all = solve(prob, alg)
-sol1flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
-sol1flux2_newdata_all = solve(prob, alg)
-sol1flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
-sol1flux3_newdata_all = solve(prob, alg)
-sol1flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_newdata_all = solve(prob, alg)
-sol1flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}    |
-sol1flux22_newdata_all = solve(prob, alg)
-sol1flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}    |
-sol1flux33_newdata_all = solve(prob, alg)
-sol1flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1}   |
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_newdata_all = solve(prob, alg)
-sol1flux111_newdata_all.estimated_ode_params[1]  #|
-sol1flux222_newdata_all = solve(prob, alg)
-sol1flux222_newdata_all.estimated_ode_params[1]  #|
-sol1flux333_newdata_all = solve(prob, alg)
-sol1flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
-
-# ------------------------------------------------------------------------------------------------------------------------------
-
-# sol2flux111.estimated_ode_params[1]
-# # mine *5
-# 7.03386Particles{Float64, 1}
-# # normal
-# 6.38951Particles{Float64, 1}
-# 6.67657Particles{Float64, 1}
-# # mine *10
-# 7.53672Particles{Float64, 1}
-# # mine *2
-# 6.29005Particles{Float64, 1}
-# 6.29844Particles{Float64, 1}
-
-# # new mine *2
-# 6.39008Particles{Float64, 1}
-# 6.22071Particles{Float64, 1}
-# 6.15611Particles{Float64, 1}
-
-# # new mine *2 tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
-# 6.25549Particles{Float64, 1}
-# ----------------------------------------------------------
-
-# ---------------------------------------------------
-
-function calculate_derivatives1(dataset)
-    x̂, time = dataset
-    num_points = length(x̂)
-    # Initialize an array to store the derivative values.
-    derivatives = similar(x̂)
-
-    for i in 2:(num_points - 1)
-        # Calculate the first-order derivative using central differences.
-        Δt_forward = time[i + 1] - time[i]
-        Δt_backward = time[i] - time[i - 1]
-
-        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-        derivatives[i] = derivative
-    end
-
-    # Derivatives at the endpoints can be calculated using forward or backward differences.
-    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-    return derivatives
-end
-
-function calculate_derivatives2(dataset)
-    u = dataset[1]
-    t = dataset[2]
-    # control points
-    n = Int(floor(length(t) / 10))
-    # spline for datasetvalues(solution) 
-    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    interp = CubicSpline(u, t)
-    # derrivatives interpolation
-    dx = t[2] - t[1]
-    time = collect(t[1]:dx:t[end])
-    smoothu = [interp(i) for i in time]
-    # derivative of the spline (must match function derivative) 
-    û = tvdiff(smoothu, 20, 0.03, dx = dx, ε = 1)
-    # tvdiff(smoothu, 100, 0.1, dx = dx)
-    # 
-    # 
-    # FDM
-    û1 = diff(u) / dx
-    # dataset[1] and smoothu are almost equal(rounding errors)
-    return û, time, smoothu, û1
-end
-
-# need to do this for all datasets
-c = [linear(prob.u0, p, t) for t in dataset3[2]] #ideal case
-b = calculate_derivatives1(dataset2) #central diffs
-# a = calculate_derivatives2(dataset) #tvdiff(smoothu, 100, 0.1, dx = dx)
-d = calculate_derivatives2(dataset1) #tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
-d = calculate_derivatives2(dataset2)
-d = calculate_derivatives2(dataset3)
-mean(abs2.(c .- b))
-mean(abs2.(c .- d[1]))
-loss(model, x, y) = mean(abs2.(model(x) .- y));
-scatter!(prob.u0 .+ (prob.tspan[2] .- dataset3[2]) .* chainflux1(dataset3[2]')')
-loss(chainflux1, dataset3[2]', dataset3[1]')
-# mean(abs2.(c[1:24] .- a[4]))
-plot(c, label = "ideal deriv")
-plot!(b, label = "Centraldiff deriv")
-# plot!(a[1], label = "tvdiff(0.1,def) derivatives") 
-plot!(d[1], label = "tvdiff(0.035,20) derivatives")
-plotly()
-
-# GridTraining , NoiseRobustDiff dataset[2][2]-dataset[2][1] l2std
-# 25 points 
-ta = range(tspan[1], tspan[2], length = 25)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-time1 = collect(tspan[1]:(1 / 50.0):tspan[2])
-physsol = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-plot(physsol, label = "solution")
-
-# plots from 32(deriv)
-# for d
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux1 = solve(prob, alg)
-n2_sol2flux1.estimated_ode_params[1]
-# with extra likelihood 
-# 10.2011Particles{Float64, 1}
-
-# without extra likelihood 
-# 6.25791Particles{Float64, 1}
-# 6.29539Particles{Float64, 1}
-
-plot!(n2_sol2flux1.ensemblesol[1], label = "tvdiff(0.035,1) derivpar")
-plot(dataset[1])
-plot!(physsol1)
-# for a
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux2 = solve(prob, alg)
-n2_sol2flux2.estimated_ode_params[1]
-# with extra likelihood
-# 8.73602Particles{Float64, 1}
-# without extra likelihood
-
-plot!(n2_sol2flux2.ensemblesol[1],
-    label = "tvdiff(0.1,def) derivatives",
-    legend = :outerbottomleft)
-
-# for b
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux3 = solve(prob, alg)
-n2_sol2flux3.estimated_ode_params[1]
-plot!(n2_sol2flux3.ensemblesol[1], label = "Centraldiff deriv")
-
-# for c
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux4 = solve(prob, alg)
-n2_sol2flux4.estimated_ode_params[1]
-plot!(n2_sol2flux4.ensemblesol[1], label = "ideal deriv")
-
-# 50 points 
-
-ta = range(tspan[1], tspan[2], length = 50)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux11 = solve(prob, alg)
-n2_sol2flux11.estimated_ode_params[1]
-
-# 5.90049Particles{Float64, 1}
-# 100 points
-ta = range(tspan[1], tspan[2], length = 100)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux111 = solve(prob, alg)
-n2_sol2flux111.estimated_ode_params[1]
-plot!(n2_sol2flux111.ensemblesol[1])
-8.88555Particles{Float64, 1}
-
-# 7.15353Particles{Float64, 1}
-# 6.21059 Particles{Float64, 1}
-# 6.31836Particles{Float64, 1}
-0.1 * p
-# ----------------------------------------------------------
-
-# Gives the linear interpolation value at t=3.5
-
-# # Problem 1 with param esimation
-# # dataset 0-1 2 percent noise
-# p = 6.283185307179586
-# # partial_logdensity
-# 6.3549Particles{Float64, 1}
-# # full log_density
-# 6.34667Particles{Float64, 1}
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2lux.estimated_ode_params[1]
-
-# # dataset 0-1 20 percent noise
-# # partial log_density
-# 6.30244Particles{Float64, 1}
-# # full log_density
-# 6.24637Particles{Float64, 1}
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # dataset 0-2 20percent noise
-# # partial log_density
-# 6.24948Particles{Float64, 1}
-# # full log_density
-# 6.26095Particles{Float64, 1}
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
-# linear = (u, p, t) -> cos(p * t)
-# tspan = (0.0, 2.0)
-
-# # dataset 0-1 2 percent noise
-# p = 6.283185307179586
-# # partial_logdensity
-# 6.3549Particles{Float64, 1}
-# # full log_density
-# 6.34667Particles{Float64, 1}
-
-# # dataset 0-1 20 percent noise
-# # partial log_density
-# 6.30244Particles{Float64, 1}
-# # full log_density
-# 6.24637Particles{Float64, 1}
-
-# # dataset 0-2 20percent noise
-# # partial log_density
-# 6.24948Particles{Float64, 1}
-# # full log_density
-# 6.26095Particles{Float64, 1}
-
-# # dataset 0-2 20percent noise 50 points(above all are 100 points)
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# # i kinda win on 25 points again
-# # dataset 0-2 20percent noise 25 points
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # i win with 25 points
-# # dataset 0-1 20percent noise 25 points
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# # new
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# # New
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # (9,2.5)(above are (9,0.5))
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# # just prev was repeat(just change)
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # i lose on 0-1,50 points
-# # dataset 0-1 20percent noise 50 points
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # (9,2.5) (above are (9,0.5))
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # ----------------------------------------------------------
-# # Problem 1 with param estimation
-# # physdt=1/20, Full likelihood new 0.5*l2std
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n05_sol2flux1 = solve(prob, alg)
-# n05_sol2flux1.estimated_ode_params[1] #6.90953 Particles{Float64, 1}
-# n05_sol2flux2 = solve(prob, alg)
-# n05_sol2flux2.estimated_ode_params[1] #6.82374 Particles{Float64, 1}
-# n05_sol2flux3 = solve(prob, alg)
-# n05_sol2flux3.estimated_ode_params[1] #6.84465 Particles{Float64, 1}
-
-# using Plots, StatsPlots
-# plot(n05_sol2flux3.ensemblesol[1])
-# plot!(physsol1)
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n05_sol2flux11 = solve(prob, alg)
-# n05_sol2flux11.estimated_ode_params[1] #7.0262 Particles{Float64, 1}
-# n05_sol2flux22 = solve(prob, alg)
-# n05_sol2flux22.estimated_ode_params[1] #5.56438 Particles{Float64, 1}
-# n05_sol2flux33 = solve(prob, alg)
-# n05_sol2flux33.estimated_ode_params[1] #7.27189 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n05_sol2flux111 = solve(prob, alg)
-# n05_sol2flux111.estimated_ode_params[1] #6.90549 Particles{Float64, 1}
-# n05_sol2flux222 = solve(prob, alg)
-# n05_sol2flux222.estimated_ode_params[1] #5.42436 Particles{Float64, 1}
-# n05_sol2flux333 = solve(prob, alg)
-# n05_sol2flux333.estimated_ode_params[1] #6.05832 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new 2*l2std
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2_sol2flux1 = solve(prob, alg)
-# n2_sol2flux1.estimated_ode_params[1]#6.9087 Particles{Float64, 1}
-# n2_sol2flux2 = solve(prob, alg)
-# n2_sol2flux2.estimated_ode_params[1]#6.86507 Particles{Float64, 1}
-# n2_sol2flux3 = solve(prob, alg)
-# n2_sol2flux3.estimated_ode_params[1]#6.59206 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2_sol2flux11 = solve(prob, alg)
-# n2_sol2flux11.estimated_ode_params[1]#7.3715 Particles{Float64, 1}
-# n2_sol2flux22 = solve(prob, alg)
-# n2_sol2flux22.estimated_ode_params[1]#9.84477 Particles{Float64, 1}
-# n2_sol2flux33 = solve(prob, alg)
-# n2_sol2flux33.estimated_ode_params[1]#6.87107 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2_sol2flux111 = solve(prob, alg)
-# n2_sol2flux111.estimated_ode_params[1]#6.60739 Particles{Float64, 1}
-# n2_sol2flux222 = solve(prob, alg)
-# n2_sol2flux222.estimated_ode_params[1]#7.05923 Particles{Float64, 1}
-# n2_sol2flux333 = solve(prob, alg)
-# n2_sol2flux333.estimated_ode_params[1]#6.5017 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new all 2*l2std
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2all5sol2flux1 = solve(prob, alg)
-# n2all5sol2flux1.estimated_ode_params[1]#11.3659 Particles{Float64, 1}
-# n2all5sol2flux2 = solve(prob, alg)
-# n2all5sol2flux2.estimated_ode_params[1]#6.65634 Particles{Float64, 1}
-# n2all5sol2flux3 = solve(prob, alg)
-# n2all5sol2flux3.estimated_ode_params[1]#6.61905 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2all5sol2flux11 = solve(prob, alg)
-# n2all5sol2flux11.estimated_ode_params[1]#6.27555 Particles{Float64, 1}
-# n2all5sol2flux22 = solve(prob, alg)
-# n2all5sol2flux22.estimated_ode_params[1]#6.24352 Particles{Float64, 1}
-# n2all5sol2flux33 = solve(prob, alg)
-# n2all5sol2flux33.estimated_ode_params[1]#6.33723 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2all5sol2flux111 = solve(prob, alg)
-# n2all5sol2flux111.estimated_ode_params[1] #5.95535 Particles{Float64, 1}
-# n2all5sol2flux222 = solve(prob, alg)
-# n2all5sol2flux222.estimated_ode_params[1] #5.98301 Particles{Float64, 1}
-# n2all5sol2flux333 = solve(prob, alg)
-# n2all5sol2flux333.estimated_ode_params[1] #5.9081 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new all (l2+l22)
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nall5sol2flux1 = solve(prob, alg)
-# nall5sol2flux1.estimated_ode_params[1]#6.54705 Particles{Float64, 1}
-# nall5sol2flux2 = solve(prob, alg)
-# nall5sol2flux2.estimated_ode_params[1]#6.6967 Particles{Float64, 1}
-# nall5sol2flux3 = solve(prob, alg)
-# nall5sol2flux3.estimated_ode_params[1]#6.47173 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nall5sol2flux11 = solve(prob, alg)
-# nall5sol2flux11.estimated_ode_params[1]#6.2113 Particles{Float64, 1}
-# nall5sol2flux22 = solve(prob, alg)
-# nall5sol2flux22.estimated_ode_params[1]#6.10675 Particles{Float64, 1}
-# nall5sol2flux33 = solve(prob, alg)
-# nall5sol2flux33.estimated_ode_params[1]#6.11541 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nall5sol2flux111 = solve(prob, alg)
-# nall5sol2flux111.estimated_ode_params[1]#6.35224 Particles{Float64, 1}
-# nall5sol2flux222 = solve(prob, alg)
-# nall5sol2flux222.estimated_ode_params[1]#6.40542 Particles{Float64, 1}
-# nall5sol2flux333 = solve(prob, alg)
-# nall5sol2flux333.estimated_ode_params[1]#6.44206 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new 5* (new only l22 mod)
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n5sol2flux1 = solve(prob, alg)
-# n5sol2flux1.estimated_ode_params[1]#7.05077 Particles{Float64, 1}
-# n5sol2flux2 = solve(prob, alg)
-# n5sol2flux2.estimated_ode_params[1]#7.07303 Particles{Float64, 1}
-# n5sol2flux3 = solve(prob, alg)
-# n5sol2flux3.estimated_ode_params[1]#5.10622 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n5sol2flux11 = solve(prob, alg)
-# n5sol2flux11.estimated_ode_params[1]#7.39852 Particles{Float64, 1}
-# n5sol2flux22 = solve(prob, alg)
-# n5sol2flux22.estimated_ode_params[1]#7.30319 Particles{Float64, 1}
-# n5sol2flux33 = solve(prob, alg)
-# n5sol2flux33.estimated_ode_params[1]#6.73722 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n5sol2flux111 = solve(prob, alg)
-# n5sol2flux111.estimated_ode_params[1]#7.15996 Particles{Float64, 1}
-# n5sol2flux222 = solve(prob, alg)
-# n5sol2flux222.estimated_ode_params[1]#7.02949 Particles{Float64, 1}
-# n5sol2flux333 = solve(prob, alg)
-# n5sol2flux333.estimated_ode_params[1]#6.9393 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nsol2flux1 = solve(prob, alg)
-# nsol2flux1.estimated_ode_params[1] #5.82707 Particles{Float64, 1}
-# nsol2flux2 = solve(prob, alg)
-# nsol2flux2.estimated_ode_params[1] #4.81534 Particles{Float64, 1}
-# nsol2flux3 = solve(prob, alg)
-# nsol2flux3.estimated_ode_params[1] #5.52965 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nsol2flux11 = solve(prob, alg)
-# nsol2flux11.estimated_ode_params[1] #7.04027 Particles{Float64, 1}
-# nsol2flux22 = solve(prob, alg)
-# nsol2flux22.estimated_ode_params[1] #7.17588 Particles{Float64, 1}
-# nsol2flux33 = solve(prob, alg)
-# nsol2flux33.estimated_ode_params[1] #6.94495 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nsol2flux111 = solve(prob, alg)
-# nsol2flux111.estimated_ode_params[1] #6.06608 Particles{Float64, 1}
-# nsol2flux222 = solve(prob, alg)
-# nsol2flux222.estimated_ode_params[1] #6.84726 Particles{Float64, 1}
-# nsol2flux333 = solve(prob, alg)
-# nsol2flux333.estimated_ode_params[1] #6.83463 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1 = solve(prob, alg)
-# sol2flux1.estimated_ode_params[1] #6.71397 Particles{Float64, 1} 6.37604 Particles{Float64, 1}
-# sol2flux2 = solve(prob, alg)
-# sol2flux2.estimated_ode_params[1] #6.73509 Particles{Float64, 1} 6.21692 Particles{Float64, 1}
-# sol2flux3 = solve(prob, alg)
-# sol2flux3.estimated_ode_params[1] #6.65453 Particles{Float64, 1} 6.23153 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11 = solve(prob, alg)
-# sol2flux11.estimated_ode_params[1] #6.23443 Particles{Float64, 1} 6.30635 Particles{Float64, 1}
-# sol2flux22 = solve(prob, alg)
-# sol2flux22.estimated_ode_params[1] #6.18879 Particles{Float64, 1} 6.30099 Particles{Float64, 1}
-# sol2flux33 = solve(prob, alg)
-# sol2flux33.estimated_ode_params[1] #6.22773 Particles{Float64, 1} 6.30671 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111 = solve(prob, alg)
-# sol2flux111.estimated_ode_params[1] #6.15832 Particles{Float64, 1} 6.35453 Particles{Float64, 1}
-# sol2flux222 = solve(prob, alg)
-# sol2flux222.estimated_ode_params[1] #6.16968 Particles{Float64, 1}6.31125 Particles{Float64, 1}
-# sol2flux333 = solve(prob, alg)
-# sol2flux333.estimated_ode_params[1] #6.12466 Particles{Float64, 1} 6.26514 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1_p = solve(prob, alg)
-# sol2flux1_p.estimated_ode_params[1] #5.74065 Particles{Float64, 1} #6.83683 Particles{Float64, 1}
-# sol2flux2_p = solve(prob, alg)
-# sol2flux2_p.estimated_ode_params[1] #9.82504 Particles{Float64, 1} #6.14568 Particles{Float64, 1}
-# sol2flux3_p = solve(prob, alg)
-# sol2flux3_p.estimated_ode_params[1] #5.75075 Particles{Float64, 1} #6.08579 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11_p = solve(prob, alg)
-# sol2flux11_p.estimated_ode_params[1] #6.19414 Particles{Float64, 1} #6.04621 Particles{Float64, 1}
-# sol2flux22_p = solve(prob, alg)
-# sol2flux22_p.estimated_ode_params[1] #6.15227 Particles{Float64, 1} #6.29086 Particles{Float64, 1}
-# sol2flux33_p = solve(prob, alg)
-# sol2flux33_p.estimated_ode_params[1] #6.19048 Particles{Float64, 1} #6.12516 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111_p = solve(prob, alg)
-# sol2flux111_p.estimated_ode_params[1] #6.51608 Particles{Float64, 1}# 6.42945Particles{Float64, 1}
-# sol2flux222_p = solve(prob, alg)
-# sol2flux222_p.estimated_ode_params[1] #6.4875 Particles{Float64, 1} # 6.44524Particles{Float64, 1}
-# sol2flux333_p = solve(prob, alg)
-# sol2flux333_p.estimated_ode_params[1] #6.51679 Particles{Float64, 1}# 6.43152Particles{Float64, 1}
-
-# # ---------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood, dataset(1.0-2.0)
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux1 = solve(prob, alg)
-# sol1flux1.estimated_ode_params[1] #6.35164 Particles{Float64, 1}
-# sol1flux2 = solve(prob, alg)
-# sol1flux2.estimated_ode_params[1] #6.30919 Particles{Float64, 1}
-# sol1flux3 = solve(prob, alg)
-# sol1flux3.estimated_ode_params[1] #6.33554 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux11 = solve(prob, alg)
-# sol1flux11.estimated_ode_params[1] #6.39769 Particles{Float64, 1}
-# sol1flux22 = solve(prob, alg)
-# sol1flux22.estimated_ode_params[1] #6.43924 Particles{Float64, 1}
-# sol1flux33 = solve(prob, alg)
-# sol1flux33.estimated_ode_params[1] #6.4697 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux111 = solve(prob, alg)
-# sol1flux111.estimated_ode_params[1] #6.27812 Particles{Float64, 1}
-# sol1flux222 = solve(prob, alg)
-# sol1flux222.estimated_ode_params[1] #6.19278 Particles{Float64, 1}
-# sol1flux333 = solve(prob, alg)
-# sol1flux333.estimated_ode_params[1] # 9.68244Particles{Float64, 1} (first try) # 6.23969 Particles{Float64, 1}(second try)
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(1.0-2.0)
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux1_p = solve(prob, alg)
-# sol1flux1_p.estimated_ode_params[1]#6.36269 Particles{Float64, 1}
-
-# sol1flux2_p = solve(prob, alg)
-# sol1flux2_p.estimated_ode_params[1]#6.34685 Particles{Float64, 1}
-
-# sol1flux3_p = solve(prob, alg)
-# sol1flux3_p.estimated_ode_params[1]#6.31421 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux11_p = solve(prob, alg)
-# sol1flux11_p.estimated_ode_params[1] #6.15725 Particles{Float64, 1}
-
-# sol1flux22_p = solve(prob, alg)
-# sol1flux22_p.estimated_ode_params[1] #6.18145 Particles{Float64, 1}
-
-# sol1flux33_p = solve(prob, alg)
-# sol1flux33_p.estimated_ode_params[1] #6.21905 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux111_p = solve(prob, alg)
-# sol1flux111_p.estimated_ode_params[1]#6.13481 Particles{Float64, 1}
-
-# sol1flux222_p = solve(prob, alg)
-# sol1flux222_p.estimated_ode_params[1]#9.68555 Particles{Float64, 1}
-
-# sol1flux333_p = solve(prob, alg)
-# sol1flux333_p.estimated_ode_params[1]#6.1477 Particles{Float64, 1}
-
-# # -----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(1-2), again but different density
-# # 12 points
-# ta = range(1.0, tspan[2], length = 12)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux1_p = solve(prob, alg)
-# sol3flux1_p.estimated_ode_params[1]#6.50048 Particles{Float64, 1}
-# sol3flux2_p = solve(prob, alg)
-# sol3flux2_p.estimated_ode_params[1]#6.57597 Particles{Float64, 1}
-# sol3flux3_p = solve(prob, alg)
-# sol3flux3_p.estimated_ode_params[1]#6.24487 Particles{Float64, 1}
-
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux11_p = solve(prob, alg)
-# sol3flux11_p.estimated_ode_params[1]#6.53093 Particles{Float64, 1}
-
-# sol3flux22_p = solve(prob, alg)
-# sol3flux22_p.estimated_ode_params[1]#6.32744 Particles{Float64, 1}
-
-# sol3flux33_p = solve(prob, alg)
-# sol3flux33_p.estimated_ode_params[1]#6.49175 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux111_p = solve(prob, alg)
-# sol3flux111_p.estimated_ode_params[1]#6.4455 Particles{Float64, 1}
-# sol3flux222_p = solve(prob, alg)
-# sol3flux222_p.estimated_ode_params[1]#6.40736 Particles{Float64, 1}
-# sol3flux333_p = solve(prob, alg)
-# sol3flux333_p.estimated_ode_params[1]#6.46214 Particles{Float64, 1}
-
-# # ---------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(0-1)
-# # 25 points
-# ta = range(tspan[1], 1.0, length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol0flux1_p = solve(prob, alg)
-# sol0flux1_p.estimated_ode_params[1]#7.12625 Particles{Float64, 1}
-# sol0flux2_p = solve(prob, alg)
-# sol0flux2_p.estimated_ode_params[1]#8.40948 Particles{Float64, 1}
-# sol0flux3_p = solve(prob, alg)
-# sol0flux3_p.estimated_ode_params[1]#7.18768 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], 1.0, length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol0flux11_p = solve(prob, alg)
-# sol0flux11_p.estimated_ode_params[1]#6.23707 Particles{Float64, 1}
-# sol0flux22_p = solve(prob, alg)
-# sol0flux22_p.estimated_ode_params[1]#6.09728 Particles{Float64, 1}
-# sol0flux33_p = solve(prob, alg)
-# sol0flux33_p.estimated_ode_params[1]#6.12971 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], 1.0, length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol0flux111_p = solve(prob, alg)
-# sol0flux111_p.estimated_ode_params[1]#5.99039 Particles{Float64, 1}
-# sol0flux222_p = solve(prob, alg)
-# sol0flux222_p.estimated_ode_params[1]#5.89609 Particles{Float64, 1}
-# sol0flux333_p = solve(prob, alg)
-# sol0flux333_p.estimated_ode_params[1]#5.91923 Particles{Float64, 1}
-
-# # ---------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 6.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f1 = solve(prob, alg)
-# sol1f1.estimated_ode_params[1]
-# # 10.9818Particles{Float64, 1}
-# sol1f2 = solve(prob, alg)
-# sol1f2.estimated_ode_params[1]
-# # sol1f3 = solve(prob, alg)
-# # sol1f3.estimated_ode_params[1]
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 6.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f11 = solve(prob, alg)
-# sol1f11.estimated_ode_params[1]
-# sol1f22 = solve(prob, alg)
-# sol1f22.estimated_ode_params[1]
-# # sol1f33 = solve(prob, alg)
-# # sol1f33.estimated_ode_params[1]
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 6.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f111 = solve(prob, alg)
-# sol1f111.estimated_ode_params[1]
-# sol1f222 = solve(prob, alg)
-# sol1f222.estimated_ode_params[1]
-# # sol1f333 = solve(prob, alg)
-# # sol1f333.estimated_ode_params[1]
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f1_p = solve(prob, alg)
-# sol1f1_p.estimated_ode_params[1]
-# sol1f2_p = solve(prob, alg)
-# sol1f2_p.estimated_ode_params[1]
-# sol1f3_p = solve(prob, alg)
-# sol1f3_p.estimated_ode_params[1]
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f11_p = solve(prob, alg)
-# sol1f11_p.estimated_ode_params[1]
-# sol1f22_p = solve(prob, alg)
-# sol1f22_p.estimated_ode_params[1]
-# sol1f33_p = solve(prob, alg)
-# sol1f33_p.estimated_ode_params[1]
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f111_p = solve(prob, alg)
-# sol1f111_p.estimated_ode_params[1]
-# sol1f222_p = solve(prob, alg)
-# sol1f222_p.estimated_ode_params[1]
-# sol1f333_p = solve(prob, alg)
-# sol1f333_p.estimated_ode_params[1]
-
-# # ----------------------------------------------------------
-
-# plot!(title = "9,2.5 50 training 2>full,1>partial")
-
-# p
-# param1
-# # (lux chain)
-# @prob mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 8e-2
-
-# # estimated parameters(lux chain)
-# param1 = sol3lux_pestim.estimated_ode_params[1]
-# @test abs(param1 - p) < abs(0.35 * p)
-
-# p
-# param1
-
-# # # my suggested Loss likelihood part
-# # #  + L2loss2(Tar, θ)
-# # # My suggested extra loss function
-# # function L2loss2(Tar::LogTargetDensity, θ)
-# #     f = Tar.prob.f
-
-# #     # parameter estimation chosen or not
-# #     if Tar.extraparams > 0
-# #         dataset = Tar.dataset
-
-# #         # Timepoints to enforce Physics
-# #         dataset = Array(reduce(hcat, dataset)')
-# #         t = dataset[end, :]
-# #         û = dataset[1:(end - 1), :]
-
-# #         ode_params = Tar.extraparams == 1 ?
-# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-# #         if length(û[:, 1]) == 1
-# #             physsol = [f(û[:, i][1],
-# #                 ode_params,
-# #                 t[i])
-# #                        for i in 1:length(û[1, :])]
-# #         else
-# #             physsol = [f(û[:, i],
-# #                 ode_params,
-# #                 t[i])
-# #                        for i in 1:length(û[1, :])]
-# #         end
-# #         #form of NN output matrix output dim x n
-# #         deri_physsol = reduce(hcat, physsol)
-
-# #         #   OG deriv(basically gradient matching in case of an ODEFunction)
-# #         # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
-# #         # if length(û[:, 1]) == 1
-# #         #     deri_sol = [f(û[:, i][1],
-# #         #         Tar.prob.p,
-# #         #         t[i])
-# #         #                 for i in 1:length(û[1, :])]
-# #         # else
-# #         #     deri_sol = [f(û[:, i],
-# #         #         Tar.prob.p,
-# #         #         t[i])
-# #         #                 for i in 1:length(û[1, :])]
-# #         # end
-# #         # deri_sol = reduce(hcat, deri_sol)
-# #         derivatives = calculate_derivatives(Tar.dataset)
-# #         deri_sol = reduce(hcat, derivatives)
-
-# #         physlogprob = 0
-# #         for i in 1:length(Tar.prob.u0)
-# #             # can add phystd[i] for u[i]
-# #             physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-# #                     LinearAlgebra.Diagonal(map(abs2,
-# #                         Tar.l2std[i] .*
-# #                         ones(length(deri_sol[i, :]))))),
-# #                 deri_sol[i, :])
-# #         end
-# #         return physlogprob
-# #     else
-# #         return 0
-# #     end
-# # end
-
-# # function calculate_derivatives(dataset)
-# #     x̂, time = dataset
-# #     num_points = length(x̂)
-
-# #     # Initialize an array to store the derivative values.
-# #     derivatives = similar(x̂)
-
-# #     for i in 2:(num_points - 1)
-# #         # Calculate the first-order derivative using central differences.
-# #         Δt_forward = time[i + 1] - time[i]
-# #         Δt_backward = time[i] - time[i - 1]
-
-# #         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-# #         derivatives[i] = derivative
-# #     end
-
-# #     # Derivatives at the endpoints can be calculated using forward or backward differences.
-# #     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-# #     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-
-# #     return derivatives
-# # end
-
-# size(dataset[1])
-# # Problem 1 with param estimation(flux,lux)
-# # Normal
-# # 6.20311 Particles{Float64, 1},6.21746Particles{Float64, 1}
-# # better
-# # 6.29093Particles{Float64, 1}, 6.27925Particles{Float64, 1}
-# # Non ideal case
-# # 6.14861Particles{Float64, 1}, 
-# sol2flux.estimated_ode_params
-# sol2lux.estimated_ode_params[1]
-# p
-# size(sol3flux_pestim.ensemblesol[2])
-# plott = sol3flux_pestim.ensemblesol[1]
-# using StatsPlots
-# plotly()
-# plot(t, sol3flux_pestim.ensemblesol[1])
-
-# function calculate_derivatives(dataset)
-#     x̂, time = dataset
-#     num_points = length(x̂)
-
-#     # Initialize an array to store the derivative values.
-#     derivatives = similar(x̂)
-
-#     for i in 2:(num_points - 1)
-#         # Calculate the first-order derivative using central differences.
-#         Δt_forward = time[i + 1] - time[i]
-#         Δt_backward = time[i] - time[i - 1]
-
-#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-#         derivatives[i] = derivative
-#     end
-
-#     # Derivatives at the endpoints can be calculated using forward or backward differences.
-#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-
-#     return derivatives
-# end
-
-# # Example usage:
-# # dataset = [x̂, time]
-# derivatives = calculate_derivatives(dataset)
-# dataset[1]
-# # Access derivative values at specific time points as needed.
-
-# # # 9,0.5
-# # 0.09894916260292887
-# # 0.09870335436072103
-# # 0.08398556878067913
-# # 0.10109070099105527
-# # 0.09122683737517055
-# # 0.08614958011892977
-# # mean(abs.(x̂ .- meanscurve1)) #0.017112298305523976
-# # mean(abs.(physsol1 .- meanscurve1)) #0.004038636894341354
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1))#0.01800876370000113
-# # mean(abs.(physsol1 .- meanscurve1))#0.007285681280600875
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.10599926120358046
-# # mean(abs.(physsol1 .- meanscurve1)) #0.10375554193397989
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.10160824458252521
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09999942538357891
-
-# # # ------------------------------------------------normale
-# # # 9,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.0333356493928835
-# # mean(abs.(physsol1 .- meanscurve1)) #0.02721733876400459
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1)) #0.020734206709433347
-# # mean(abs.(physsol1 .- meanscurve1)) #0.012502850740700212
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.10615859683094729
-# # mean(abs.(physsol1 .- meanscurve1)) #0.10508141153722575
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.10833514946031565
-# # mean(abs.(physsol1 .- meanscurve1)) #0.10668470203219232
-
-# # # 9,0.5
-# # 10.158108285475553
-# # 10.207234384538026
-# # 10.215000657664852
-# # 10.213817644016174
-# # 13.380030074088719
-# # 13.348906350967326
-
-# # 6.952731422892041
-
-# # # All losses
-# # 10.161478523326277
-# # # L2 losses 1
-# # 9.33312996960278
-# # # L2 losses 2
-# # 10.217417241370631
-
-# # mean([fhsamples1[i][26] for i in 500:1000]) #6.245045767509431
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.212522300650451
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #35.328636809737695
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #35.232963812125654
-
-# # # ---------------------------------------normale
-# # # 9,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.547771572198114
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.158906185002702
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.210400972620185
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.153845019454522
-
-# # # ----------------more dataset normale -----------------------------
-# # # 9,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.271141178216537
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.241144692919369
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.124480447973127
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.07838011629903
-
-# # # 9,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.016551602015599295
-# # mean(abs.(physsol1 .- meanscurve1)) #0.0021488618484224245
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1)) #0.017022725082640747
-# # mean(abs.(physsol1 .- meanscurve1)) #0.004339761917100232
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.09668785317864312
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09430712337543362
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.09958118358974392
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09717454226368502
-
-# # # ----------------more dataset special -----------------------------
-# # # 9,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.284355334485365
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.259238106698602
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.139808934336987
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.03921327641226
-
-# # # 9,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.016627231605546876
-# # mean(abs.(physsol1 .- meanscurve1)) #0.0020311429130039564
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1)) #0.016650324577507352
-# # mean(abs.(physsol1 .- meanscurve1)) #0.0027537543411154677
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.09713187937270151
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.09550234866855814
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
-
-# # using Plots, StatsPlots
-# # plotly()
-
-# # ---------------------------------------------------------
-# # # # Distribution abstract in wrapper, dataset Float64
-# # # 268.651 s (206393690 allocations: 388.71 GiB)
-# # # 318.170551 seconds (206.29 M allocations: 388.453 GiB, 20.83% gc time)
-
-# # # # Above with dataset Real subtype
-# # # 326.201 s (206327409 allocations: 388.42 GiB)
-# # # 363.189370 seconds (206.25 M allocations: 387.975 GiB, 15.77% gc time)
-# # # 306.171 s (206321277 allocations: 388.55 GiB)
-# # # 356.180699 seconds (206.43 M allocations: 388.361 GiB, 13.77% gc time)
-
-# # # # Above with dataset AbstractFloat subtype
-# # # 290.751187 seconds (205.94 M allocations: 387.955 GiB, 12.92% gc time)
-# # # 296.319815 seconds (206.38 M allocations: 388.730 GiB, 12.69% gc time)
-
-# # # # ODEProblem float64 dtaset and vector distri inside
-# # #   273.169 s (206128318 allocations: 388.40 GiB)
-# # #   274.059531 seconds (205.91 M allocations: 387.953 GiB, 12.77% gc time)
-
-# # # #   Dataset float64 inside and vector distri outsude
-# # #   333.603 s (206251143 allocations: 388.41 GiB)
-# # # 373.377222 seconds (206.11 M allocations: 387.968 GiB, 13.25% gc time)
-# # #   359.745 s (206348301 allocations: 388.41 GiB)
-# # # 357.813114 seconds (206.31 M allocations: 388.354 GiB, 13.54% gc time)
-
-# # # # Dataset float64 inside and vector distri inside
-# # #   326.437 s (206253571 allocations: 388.41 GiB)
-# # #   290.334083 seconds (205.92 M allocations: 387.954 GiB, 13.82% gc time)
-
-# # # # current setting
-# # # 451.304 s (206476927 allocations: 388.43 GiB)
-# # # 384.532732 seconds (206.22 M allocations: 387.976 GiB, 13.17% gc time)
-# # # 310.223 s (206332558 allocations: 388.63 GiB)
-# # # 344.243889 seconds (206.34 M allocations: 388.409 GiB, 13.84% gc time)
-# # # 357.457737 seconds (206.66 M allocations: 389.064 GiB, 18.16% gc time)
-
-# # # # shit setup
-# # #   325.595 s (206283732 allocations: 388.41 GiB)
-# # # 334.248753 seconds (206.06 M allocations: 387.964 GiB, 12.60% gc time)
-# # #   326.011 s (206370857 allocations: 388.56 GiB)
-# # # 327.203339 seconds (206.29 M allocations: 388.405 GiB, 12.92% gc time)
-
-# # # # in wrapper Distribution prior, insiade FLOAT64 DATASET
-# # # 325.158167 seconds (205.97 M allocations: 387.958 GiB, 15.07% gc time) 
-# # #   429.536 s (206476324 allocations: 388.43 GiB)
-# # #   527.364 s (206740343 allocations: 388.58 GiB)
-
-# # # #   wrapper Distribtuion, inside Float64
-# # # 326.017 s (206037971 allocations: 387.96 GiB)
-# # # 347.424730 seconds (206.45 M allocations: 388.532 GiB, 12.92% gc time)
-
-# # # 439.047568 seconds (284.24 M allocations: 392.598 GiB, 15.25% gc time, 14.36% compilation time: 0% of which was recompilation)
-# # # 375.472142 seconds (206.40 M allocations: 388.529 GiB, 14.93% gc time)
-# # # 374.888820 seconds (206.34 M allocations: 388.346 GiB, 14.09% gc time)
-# # # 363.719611 seconds (206.39 M allocations: 388.581 GiB, 15.08% gc time)
-# # # # inside Distribtion, instide Float64
-# # #   310.238 s (206324249 allocations: 388.53 GiB)
-# # #   308.991494 seconds (206.34 M allocations: 388.549 GiB, 14.01% gc time)
-# # #   337.442 s (206280712 allocations: 388.36 GiB)
-# # #   299.983096 seconds (206.29 M allocations: 388.512 GiB, 17.14% gc time)
-
-# # #   394.924357 seconds (206.27 M allocations: 388.337 GiB, 23.68% gc time)
-# # # 438.204179 seconds (206.39 M allocations: 388.470 GiB, 23.84% gc time)
-# # #   376.626914 seconds (206.46 M allocations: 388.693 GiB, 18.72% gc time)
-# # # 286.863795 seconds (206.14 M allocations: 388.370 GiB, 18.80% gc time)
-# # #   285.556929 seconds (206.22 M allocations: 388.371 GiB, 17.04% gc time)
-# # #   291.471662 seconds (205.96 M allocations: 388.068 GiB, 19.85% gc time)
-
-# # # 495.814341 seconds (284.62 M allocations: 392.622 GiB, 12.56% gc time, 10.96% compilation time: 0% of which was recompilation)
-# # # 361.530617 seconds (206.36 M allocations: 388.526 GiB, 14.98% gc time)
-# # # 348.576065 seconds (206.22 M allocations: 388.337 GiB, 15.01% gc time)
-# # # 374.575609 seconds (206.45 M allocations: 388.586 GiB, 14.65% gc time)
-# # # 314.223008 seconds (206.23 M allocations: 388.411 GiB, 14.63% gc time)
-
-# # PROBLEM-3 LOTKA VOLTERRA EXAMPLE [WIP] (WITH PARAMETER ESTIMATION)(will be put in tutorial page)
-# function lotka_volterra(u, p, t)
-#     # Model parameters.
-#     α, β, γ, δ = p
-#     # Current state.
-#     x, y = u
-
-#     # Evaluate differential equations.
-#     dx = (α - β * y) * x # prey
-#     dy = (δ * x - γ) * y # predator
-
-#     return [dx, dy]
-# end
-
-# u0 = [1.0, 1.0]
-# p = [1.5, 1.0, 3.0, 1.0]
-# tspan = (0.0, 6.0)
-# prob = ODEProblem(lotka_volterra, u0, tspan, p)
-# solution = solve(prob, Tsit5(); saveat = 0.05)
-
-# as = reduce(hcat, solution.u)
-# as[1, :]
-# # Plot simulation.
-# time = solution.t
-# u = hcat(solution.u...)
-# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-# x = u[1, :] + 0.5 * randn(length(u[1, :]))
-# y = u[2, :] + 0.5 * randn(length(u[1, :]))
-# dataset = [x[1:50], y[1:50], time[1:50]]
-# # scatter!(time, [x, y])
-# # scatter!(dataset[3], [dataset[2], dataset[1]])
-
-# # NN has 2 outputs as u -> [dx,dy]
-# chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
-#     Lux.Dense(6, 2))
-# chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
-
-# # fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-# #                                                                           dataset = dataset,
-# #                                                                           draw_samples = 1000,
-# #                                                                           l2std = [
-# #                                                                               0.05,
-# #                                                                               0.05,
-# #                                                                           ],
-# #                                                                           phystd = [
-# #                                                                               0.05,
-# #                                                                               0.05,
-# #                                                                           ],
-# #                                                                           priorsNNw = (0.0,
-# #          
-
-# #   3.0))
-
-# # check if NN output is more than 1
-# # numoutput = size(luxar[1])[1]
-# # if numoutput > 1
-# #     # Initialize a vector to store the separated outputs for each output dimension
-# #     output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput]
-
-# #     # Loop through each element in the `as` vector
-# #     for element in as
-# #         for i in 1:numoutput
-# #             push!(output_matrices[i], element[i, :])  # Append the i-th output (i-th row) to the i-th output_matrices
-# #         end
-# #     end
-
-# #     ensemblecurves = Vector{}[]
-# #     for r in 1:numoutput
-# #         br = hcat(output_matrices[r]...)'
-# #         ensemblecurve = prob.u0[r] .+
-# #                         [Particles(br[:, i]) for i in 1:length(t)] .*
-# #                         (t .- prob.tspan[1])
-# #         push!(ensemblecurves, ensemblecurve)
-# #     end
-
-# # else
-# #     # ensemblecurve = prob.u0 .+
-# #     #                 [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .*
-# #     #                 (t .- prob.tspan[1])
-# #     print("yuh")
-# # end
-
-# # fhsamplesflux2
-# # nnparams = length(init1)
-# # estimnnparams = [Particles(reduce(hcat, fhsamplesflux2)[i, :]) for i in 1:nnparams]
-# # ninv=4
-# # estimated_params = [Particles(reduce(hcat, fhsamplesflux2[(end - ninv + 1):end])[i, :])
-# #                     for i in (nnparams + 1):(nnparams + ninv)]
-# # output_matrices[r]
-# # br = hcat(output_matrices[r]...)'
-
-# # br[:, 1]
-
-# # [Particles(br[:, i]) for i in 1:length(t)]
-# # prob.u0
-# # [Particles(br[:, i]) for i in 1:length(t)] .*
-# # (t .- prob.tspan[1])
-
-# # ensemblecurve = prob.u0[r] .+
-# #                 [Particles(br[:, i]) for i in 1:length(t)] .*
-# #                 (t .- prob.tspan[1])
-# # push!(ensemblecurves, ensemblecurve)
-
-# using StatsPlots
-# plotly()
-# plot(t, ensemblecurve)
-# plot(t, ensemblecurves[1])
-# plot!(t, ensemblecurves[2])
-# ensemblecurve
-# ensemblecurves[1]
-# fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-#     dataset = dataset,
-#     draw_samples = 1000,
-#     l2std = [
-#         0.05,
-#         0.05,
-#     ],
-#     phystd = [
-#         0.05,
-#         0.05,
-#     ],
-#     priorsNNw = (0.0,
-#         3.0),
-#     param = [
-#         Normal(1.5,
-#             0.5),
-#         Normal(1.2,
-#             0.5),
-#         Normal(3.3,
-#             0.5),
-#         Normal(1.4,
-#             0.5),
-#     ], progress = true)
-
-# alg = NeuralPDE.BNNODE(chainflux1,
-#     dataset = dataset,
-#     draw_samples = 1000,
-#     l2std = [
-#         0.05,
-#         0.05,
-#     ],
-#     phystd = [
-#         0.05,
-#         0.05,
-#     ],
-#     priorsNNw = (0.0,
-#         3.0),
-#     param = [
-#         Normal(4.5,
-#             5),
-#         Normal(7,
-#             2),
-#         Normal(5,
-#             2),
-#         Normal(-4,
-#             6),
-#     ],
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux_pestim = solve(prob, alg)
-
-# # OG PARAM VALUES
-# [1.5, 1.0, 3.0, 1.0]
-# # less
-# # [1.34, 7.51, 2.54, -2.55]
-# # better
-# # [1.48, 0.993, 2.77, 0.954]
-
-# sol3flux_pestim.es
-# sol3flux_pestim.estimated_ode_params
-# # fh_mcmc_chainlux1, fhsampleslux1, fhstatslux1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                                        dataset = dataset,
-# #                                                                        draw_samples = 1000,
-# #                                                                        l2std = [0.05, 0.05],
-# #                                                                        phystd = [
-# #                                                                            0.05,
-# #                                                                            0.05,
-# #                                                                        ],
-# #                                                                        priorsNNw = (0.0,
-# #                                                                                     3.0))
-
-# # fh_mcmc_chainlux2, fhsampleslux2, fhstatslux2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                                        dataset = dataset,
-# #                                                                        draw_samples = 1000,
-# #                                                                        l2std = [0.05, 0.05],
-# #                                                                        phystd = [
-# #                                                                            0.05,
-# #                                                                            0.05,
-# #                                                                        ],
-# #                                                                        priorsNNw = (0.0,
-# #                                                                                     3.0),
-# #                                                                        param = [
-# #                                                                            Normal(1.5, 0.5),
-# #                                                                            Normal(1.2, 0.5),
-# #                                                                            Normal(3.3, 0.5),
-# #                                                                            Normal(1.4, 0.5),
-# #                                                                        ])
-
-# init1, re1 = destructure(chainflux1)
-# θinit, st = Lux.setup(Random.default_rng(), chainlux1)
-# #   PLOT testing points
-# t = time
-# p = prob.p
-# collect(Float64, vcat(ComponentArrays.ComponentArray(θinit)))
-# collect(Float64, ComponentArrays.ComponentArray(θinit))
-# # Mean of last 1000 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-# out = re1.([fhsamplesflux1[i][1:68] for i in 500:1000])
-# yu = [out[i](t') for i in eachindex(out)]
-
-# function getensemble(yu, num_models)
-#     num_rows, num_cols = size(yu[1])
-#     row_means = zeros(Float32, num_rows, num_cols)
-#     for i in 1:num_models
-#         row_means .+= yu[i]
-#     end
-#     row_means ./ num_models
-# end
-# fluxmean = getensemble(yu, length(out))
-# meanscurve1_1 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
-# mean(abs.(u .- meanscurve1_1))
-
-# plot!(t, physsol1)
-# @test mean(abs2.(x̂ .- meanscurve1_1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# out = re1.([fhsamplesflux2[i][1:68] for i in 500:1000])
-# yu = collect(out[i](t') for i in eachindex(out))
-# fluxmean = getensemble(yu, length(out))
-# meanscurve1_2 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
-# mean(abs.(u .- meanscurve1_2))
-
-# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# θ = [vector_to_parameters(fhsampleslux1[i][1:(end - 4)], θinit) for i in 500:1000]
-# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
-# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-# meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# θ = [vector_to_parameters(fhsampleslux2[i][1:(end - 4)], θinit) for i in 500:1000]
-# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
-# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-# meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# # # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-# @test abs(p - mean([fhsamplesflux2[i][69] for i in 500:1000])) < 0.1 * p[1]
-# @test abs(p - mean([fhsampleslux2[i][69] for i in 500:1000])) < 0.2 * p[1]
-
-# # @test abs(p - mean([fhsamplesflux2[i][70] for i in 500:1000])) < 0.1 * p[2]
-# # @test abs(p - mean([fhsampleslux2[i][70] for i in 500:1000])) < 0.2 * p[2]
-
-# # @test abs(p - mean([fhsamplesflux2[i][71] for i in 500:1000])) < 0.1 * p[3]
-# # @test abs(p - mean([fhsampleslux2[i][71] for i in 500:1000])) < 0.2 * p[3]
-
-# # @test abs(p - mean([fhsamplesflux2[i][72] for i in 500:1000])) < 0.1 * p[4]
-# # @test abs(p - mean([fhsampleslux2[i][72] for i in 500:1000])) < 0.2 * p[4]
-
-# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                               dataset = dataset,
-# #                                                               draw_samples = 1000,
-# #                                                               l2std = [0.05, 0.05],
-# #                                                               phystd = [0.05, 0.05],
-# #                                                               priorsNNw = (0.0, 3.0),
-# #                                                               param = [
-# #                                                                   Normal(1.5, 0.5),
-# #                                                                   Normal(1.2, 0.5),
-# #                                                                   Normal(3.3, 0.5),
-# #                                                                   Normal(1.4, 0.5),
-# #                                                               ], autodiff = true)
-
-# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                               dataset = dataset,
-# #                                                               draw_samples = 1000,
-# #                                                               l2std = [0.05, 0.05],
-# #                                                               phystd = [0.05, 0.05],
-# #                                                               priorsNNw = (0.0, 3.0),
-# #                                                               param = [
-# #                                                                   Normal(1.5, 0.5),
-# #                                                                   Normal(1.2, 0.5),
-# #                                                                   Normal(3.3, 0.5),
-# #                                                                   Normal(1.4, 0.5),
-# #                                                               ], nchains = 2)
-
-# # NOTES (WILL CLEAR LATER)
-# # --------------------------------------------------------------------------------------------
-# # Hamiltonian energy must be lowest(more paramters the better is it to map onto them)
-# # full better than L2 and phy individual(test)
-# # in mergephys more points after training points is better from 20->40
-# # does consecutive runs bceome better? why?(plot 172)(same chain maybe)
-# # does density of points in timespan matter dataset vs internal timespan?(plot 172)(100+0.01)
-# # when training from 0-1 and phys from 1-5 with 1/150 simple nn slow,but bigger nn faster decrease in Hmailtonian
-# # bigger time interval more curves to adapt to only more parameters adapt to that, better NN architecture
-# # higher order logproblems solve better
-# # repl up up are same instances? but reexecute calls are new?
-
-# #Compare results against paper example
-# # Lux chains support (DONE)
-# # fix predictions for odes depending upon 1,p in f(u,p,t)(DONE)
-# # lotka volterra learn curve beyond l2 losses(L2 losses determine accuracy of parameters)(parameters cant run free ∴ L2 interval only)
-# # check if prameters estimation works(YES)
-# # lotka volterra parameters estimate (DONE)
-
-# using NeuralPDE, Lux, Flux, Optimization, OptimizationOptimJL
-# import ModelingToolkit: Interval
-# using Plots, StatsPlots
-# plotly()
-# # Profile.init()
-
-# @parameters x y
-# @variables u(..)
-# Dxx = Differential(x)^2
-# Dyy = Differential(y)^2
-
-# # 2D PDE
-# eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# # Boundary conditions
-# bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-#     u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
-# # Space and time domains
-# domains = [x ∈ Interval(0.0, 1.0),
-#     y ∈ Interval(0.0, 1.0)]
-
-# # Neural network
-# dim = 2 # number of dimensions
-# chain = Flux.Chain(Flux.Dense(dim, 16, Lux.σ), Flux.Dense(16, 16, Lux.σ), Flux.Dense(16, 1))
-# θ, re = destructure(chain)
-# # Discretization
-# dx = 0.05
-# discretization = PhysicsInformedNN(chain, GridTraining(dx))
-
-# @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-
-# pinnrep = symbolic_discretize(pde_system, discretization)
-# typeof(pinnrep.phi)
-# typeof(pinnrep.phi)
-# typeof(re)
-# pinnrep.phi([1, 2], θ)
-
-# typeof(θ)
-
-# print(pinnrep)
-# pinnrep.eqs
-# pinnrep.bcs
-# pinnrep.domains
-# pinnrep.eq_params
-# pinnrep.defaults
-# print(pinnrep.default_p)
-# pinnrep.param_estim
-# print(pinnrep.additional_loss)
-# pinnrep.adaloss
-# pinnrep.depvars
-# pinnrep.indvars
-# pinnrep.dict_depvar_input
-# pinnrep.dict_depvars
-# pinnrep.dict_indvars
-# print(pinnrep.logger)
-# pinnrep.multioutput
-# pinnrep.iteration
-# pinnrep.init_params
-# pinnrep.flat_init_params
-# pinnrep.phi
-# pinnrep.derivative
-# pinnrep.strategy
-# pinnrep.pde_indvars
-# pinnrep.bc_indvars
-# pinnrep.pde_integration_vars
-# pinnrep.bc_integration_vars
-# pinnrep.integral
-# pinnrep.symbolic_pde_loss_functions
-# pinnrep.symbolic_bc_loss_functions
-# pinnrep.loss_functions
-
-# #  = discretize(pde_system, discretization)
-# prob = symbolic_discretize(pde_system, discretization)
-# # "The boundary condition loss functions"
-# sum([prob.loss_functions.bc_loss_functions[i](θ) for i in eachindex(1:4)])
-# sum([prob.loss_functions.pde_loss_functions[i](θ) for i in eachindex(1)])
-
-# prob.loss_functions.full_loss_function(θ, 32)
-
-# prob.loss_functions.bc_loss_functions[1](θ)
-
-# prob.loss_functions.bc_loss_functions
-# prob.loss_functions.full_loss_function
-# prob.loss_functions.additional_loss_function
-# prob.loss_functions.pde_loss_functions
-
-# 1.3953060473003345 + 1.378102161087438 + 1.395376727128639 + 1.3783868705075002 +
-# 0.22674532775196876
-# # "The PDE loss functions"
-# prob.loss_functions.pde_loss_functions
-# prob.loss_functions.pde_loss_functions[1](θ)
-# # "The full loss function, combining the PDE and boundary condition loss functions.This is the loss function that is used by the optimizer."
-# prob.loss_functions.full_loss_function(θ, nothing)
-# prob.loss_functions.full_loss_function(θ, 423423)
-
-# # "The wrapped `additional_loss`, as pieced together for the optimizer."
-# prob.loss_functions.additional_loss_function
-# # "The pre-data version of the PDE loss function"
-# prob.loss_functions.datafree_pde_loss_functions
-# # "The pre-data version of the BC loss function"
-# prob.loss_functions.datafree_bc_loss_functions
-
-# using Random
-# θ, st = Lux.setup(Random.default_rng(), chain)
-# #Optimizer
-# opt = OptimizationOptimJL.BFGS()
-
-# #Callback function
-# callback = function (p, l)
-#     println("Current loss is: $l")
-#     return false
-# end
-
-# res = Optimization.solve(prob, opt, callback = callback, maxiters = 1000)
-# phi = discretization.phi
-
-# # ------------------------------------------------
-# using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, OrdinaryDiffEq,
-#       Plots
-# import ModelingToolkit: Interval, infimum, supremum
-# @parameters t, σ_, β, ρ
-# @variables x(..), y(..), z(..)
-# Dt = Differential(t)
-# eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-#     Dt(y(t)) ~ x(t) * (ρ - z(t)) - y(t),
-#     Dt(z(t)) ~ x(t) * y(t) - β * z(t)]
-
-# bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-# domains = [t ∈ Interval(0.0, 1.0)]
-# dt = 0.01
-
-# input_ = length(domains)
-# n = 8
-# chain1 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, 1))
-# chain2 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, 1))
-# chain3 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, 1))
-
-# function lorenz!(du, u, p, t)
-#     du[1] = 10.0 * (u[2] - u[1])
-#     du[2] = u[1] * (28.0 - u[3]) - u[2]
-#     du[3] = u[1] * u[2] - (8 / 3) * u[3]
-# end
-
-# u0 = [1.0; 0.0; 0.0]
-# tspan = (0.0, 1.0)
-# prob = ODEProblem(lorenz!, u0, tspan)
-# sol = solve(prob, Tsit5(), dt = 0.1)
-# ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
-# function getData(sol)
-#     data = []
-#     us = hcat(sol(ts).u...)
-#     ts_ = hcat(sol(ts).t...)
-#     return [us, ts_]
-# end
-# data = getData(sol)
-
-# (u_, t_) = data
-# len = length(data[2])
-
-# depvars = [:x, :y, :z]
-# function additional_loss(phi, θ, p)
-#     return sum(sum(abs2, phi[i](t_, θ[depvars[i]]) .- u_[[i], :]) / len for i in 1:1:3)
-# end
-
-# discretization = NeuralPDE.PhysicsInformedNN([chain1, chain2, chain3],
-#                                              NeuralPDE.GridTraining(dt),
-#                                              param_estim = false,
-#                                              additional_loss = additional_loss)
-# @named pde_system = PDESystem(eqs, bcs, domains, [t], [x(t), y(t), z(t)], [σ_, ρ, β],
-#                               defaults = Dict([p .=> 1.0 for p in [σ_, ρ, β]]))
-# prob = NeuralPDE.discretize(pde_system, discretization)
-# callback = function (p, l)
-#     println("Current loss is: $l")
-#     return false
-# end
-# res = Optimization.solve(prob, BFGS(); callback = callback, maxiters = 5000)
-# p_ = res.u[(end - 2):end] # p_ = [9.93, 28.002, 2.667]
-
-# minimizers = [res.u.depvar[depvars[i]] for i in 1:3]
-# ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
-# u_predict = [[discretization.phi[i]([t], minimizers[i])[1] for t in ts] for i in 1:3]
-# plot(sol)
-# plot!(ts, u_predict, label = ["x(t)" "y(t)" "z(t)"])
-
-# discretization.multioutput
-# discretization.chain
-# discretization.strategy
-# discretization.init_params
-# discretization.phi
-# discretization.derivative
-# discretization.param_estim
-# discretization.additional_loss
-# discretization.adaptive_loss
-# discretization.logger
-# discretization.log_options
-# discretization.iteration
-# discretization.self_increment
-# discretization.multioutput
-# discretization.kwargs
-
-# struct BNNODE1{P <: Vector{<:Distribution}}
-#     chain::Any
-#     Kernel::Any
-#     draw_samples::UInt32
-#     priorsNNw::Tuple{Float64, Float64}
-#     param::P
-#     l2std::Vector{Float64}
-#     phystd::Vector{Float64}
-
-#     function BNNODE1(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
-#                      l2std = [0.05], phystd = [0.05])
-#         BNNODE1(chain, Kernel, draw_samples, priorsNNw, param, l2std, phystd)
-#     end
-# end
-
-# struct BNNODE3{C, K, P <: Union{Any, Vector{<:Distribution}}}
-#     chain::C
-#     Kernel::K
-#     draw_samples::UInt32
-#     priorsNNw::Tuple{Float64, Float64}
-#     param::P
-#     l2std::Vector{Float64}
-#     phystd::Vector{Float64}
-
-#     function BNNODE3(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
-#                      l2std = [0.05], phystd = [0.05])
-#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain, Kernel, draw_samples,
-#                                                           priorsNNw, param, l2std, phystd)
-#     end
-# end
-# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-# linear = (u, p, t) -> cos(2 * π * t)
-# tspan = (0.0, 2.0)
-# u0 = 0.0
-# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-
-# ta = range(tspan[1], tspan[2], length = 300)
-# u = [linear_analytic(u0, nothing, ti) for ti in ta]
-# sol1 = solve(prob, Tsit5())
-
-# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂[1:100], time[1:100]]
-
-# # Call BPINN, create chain
-# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
-# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-# HMC
-# solve(prob, BNNODE(chainflux, HMC))
-# BNNODE1(chainflux, HMC, 2000)
-
-# draw_samples = 2000
-# priorsNNw = (0.0, 3.0)
-# param = []
-# l2std = [0.05]
-# phystd = [0.05]
-# @time BNNODE3(chainflux, HMC, draw_samples = 2000, priorsNNw = (0.0, 3.0),
-#               param = [nothing],
-#               l2std = [0.05], phystd = [0.05])
-# typeof(Nothing) <: Vector{<:Distribution}
-# Nothing <: Distribution
-# {UnionAll} <: Distribution
-# @time [Nothing]
-# typeof([Nothing])
-# @time [1]
-
-# function test1(sum; c = 23, d = 32)
-#     return sum + c + d
-# end
-# function test(a, b; c, d)
-#     return test1(a + b, c, d)
-# end
-
-# test(2, 2)
-
-# struct BNNODE3{C, K, P <: Union{Vector{Nothing}, Vector{<:Distribution}}}
-#     chain::C
-#     Kernel::K
-#     draw_samples::Int64
-#     priorsNNw::Tuple{Float64, Float64}
-#     param::P
-#     l2std::Vector{Float64}
-#     phystd::Vector{Float64}
-
-#     function BNNODE3(chain, Kernel; draw_samples,
-#                      priorsNNw, param = [nothing], l2std, phystd)
-#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain,
-#                                                           Kernel,
-#                                                           draw_samples,
-#                                                           priorsNNw,
-#                                                           param, l2std,
-#                                                           phystd)
-#     end
-# end
-
-# function solve1(prob::DiffEqBase.AbstractODEProblem, alg::BNNODE3;
-#                 dataset = [nothing], dt = 1 / 20.0,
-#                 init_params = nothing, nchains = 1,
-#                 autodiff = false, Integrator = Leapfrog,
-#                 Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
-#                 Metric = DiagEuclideanMetric, jitter_rate = 3.0,
-#                 tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
-#                 n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = true,
-#                 verbose = false)
-#     chain = alg.chain
-#     l2std = alg.l2std
-#     phystd = alg.phystd
-#     priorsNNw = alg.priorsNNw
-#     Kernel = alg.Kernel
-#     draw_samples = alg.draw_samples
-
-#     param = alg.param == [nothing] ? [] : alg.param
-#     mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain, dataset = dataset,
-#                                                             draw_samples = draw_samples,
-#                                                             init_params = init_params,
-#                                                             physdt = dt, l2std = l2std,
-#                                                             phystd = phystd,
-#                                                             priorsNNw = priorsNNw,
-#                                                             param = param,
-#                                                             nchains = nchains,
-#                                                             autodiff = autodiff,
-#                                                             Kernel = Kernel,
-#                                                             Integrator = Integrator,
-#                                                             Adaptor = Adaptor,
-#                                                             targetacceptancerate = targetacceptancerate,
-#                                                             Metric = Metric,
-#                                                             jitter_rate = jitter_rate,
-#                                                             tempering_rate = tempering_rate,
-#                                                             max_depth = max_depth,
-#                                                             Δ_max = Δ_max,
-#                                                             n_leapfrog = n_leapfrog, δ = δ,
-#                                                             λ = λ, progress = progress,
-#                                                             verbose = verbose)
-# end
-
-# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-# linear = (u, p, t) -> cos(2 * π * t)
-# tspan = (0.0, 2.0)
-# u0 = 0.0
-# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-
-# ta = range(tspan[1], tspan[2], length = 300)
-# u = [linear_analytic(u0, nothing, ti) for ti in ta]
-# # sol1 = solve(prob, Tsit5())
-
-# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂[1:100], time[1:100]]
-
-# # Call BPINN, create chain
-# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
-# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-# HMC
-
-# solve1(prob, a)
-# a = BNNODE3(chainflux, HMC, draw_samples = 2000,
-#             priorsNNw = (0.0, 3.0),
-#             l2std = [0.05], phystd = [0.05])
-
-# Define Lotka-Volterra model.
-function lotka_volterra1(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-u0 = [1.0, 1.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 6.0)
-prob = ODEProblem(lotka_volterra1, u0, tspan, p)
-solution = solve(prob, Tsit5(); saveat = 0.05)
-
-as = reduce(hcat, solution.u)
-as[1, :]
-# Plot simulation.
-time = solution.t
-u = hcat(solution.u...)
-# BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-x = u[1, :] + 0.5 * randn(length(u[1, :]))
-y = u[2, :] + 0.5 * randn(length(u[1, :]))
-dataset = [x[1:50], y[1:50], time[1:50]]
-# scatter!(time, [x, y])
-# scatter!(dataset[3], [dataset[2], dataset[1]])
-
-# NN has 2 outputs as u -> [dx,dy]
-chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
-    Lux.Dense(6, 2))
-chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
-
-fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [
-        0.05,
-        0.05,
-    ],
-    phystd = [
-        0.05,
-        0.05,
-    ],
-    priorsNNw = (0.0, 3.0), progress = true)
-ahmc_bayesian_pinn_ode(prob, chainflux1,
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [
-        0.05,
-        0.05,
-    ],
-    phystd = [
-        0.05,
-        0.05,
-    ],
-    priorsNNw = (0.0, 3.0), progress = true)
-
-#     2×171 Matrix{Float64}:
-#  -0.5  -0.518956  -0.529639  …  -1.00266  -1.01049
-#   2.0   1.97109    1.92747       0.42619   0.396335
-
-#     2-element Vector{Float64}:
-#  -119451.94949911036
-#  -128543.23714618056
-
-# alg = NeuralPDE.BNNODE(chainflux1,
-#     dataset = dataset,
-#     draw_samples = 1000,
-#     l2std = [
-#         0.05,
-#         0.05,
-#     ],
-#     phystd = [
-#         0.05,
-#         0.05,
-#     ],
-#     priorsNNw = (0.0,
-#         3.0),
-#     param = [
-#         Normal(4.5,
-#             5),
-#         Normal(7,
-#             2),
-#         Normal(5,
-#             2),
-#         Normal(-4,
-#             6),
-#     ],
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux_pestim = solve(prob, alg)
-
-#  ----------------------------------------------
-# original paper implementation
-# 25 points 
-run1  #7.70593 Particles{Float64, 1}
-run2 #6.66347 Particles{Float64, 1} 
-run3 #6.84827 Particles{Float64, 1} 
-
-# 50 points 
-run1 #7.83577 Particles{Float64, 1}
-run2 #6.49477 Particles{Float64, 1}
-run3 #6.47421 Particles{Float64, 1}
-
-# 100 points 
-run1 #5.96604 Particles{Float64, 1}
-run2 #6.05432 Particles{Float64, 1}
-run3 #6.08856 Particles{Float64, 1}
-
-# Full likelihood(uses total variation regularized differentiation) 
-# 25 points 
-run1 #6.41722 Particles{Float64, 1}
-run2 #6.42782 Particles{Float64, 1}
-run3 #6.42782 Particles{Float64, 1}
-
-# 50 points
-run1 #5.71268 Particles{Float64, 1}
-run2 #5.74599 Particles{Float64, 1}
-run3 #5.74599 Particles{Float64, 1}
-
-# 100 points  
-run1 #6.59097 Particles{Float64, 1}
-run2 #6.62813 Particles{Float64, 1}
-run3 #6.62813 Particles{Float64, 1}
-
-using Plots, StatsPlots
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u0 = [1.0, 1.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 6.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-
-# Plot simulation.
-
-solution = solve(prob, Tsit5(); saveat = 0.05)
-plot(solve(prob, Tsit5()))
-
-# Dataset creation for parameter estimation
-time = solution.t
-u = hcat(solution.u...)
-x = u[1, :] + 0.5 * randn(length(u[1, :]))
-y = u[2, :] + 0.5 * randn(length(u[1, :]))
-dataset = [x, y, time]
-
-# Neural Networks must have 2 outputs as u -> [dx,dy] in function lotka_volterra()
-chainflux = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2)) |>
-            Flux.f64
-
-chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
-
-alg1 = NeuralPDE.BNNODE(chainflux,
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol_flux_pestim = solve(prob, alg1)
-
-# Dataset not needed as we are solving the equation with ideal parameters
-alg2 = NeuralPDE.BNNODE(chainlux,
-    draw_samples = 1000,
-    l2std = [
-        0.05,
-        0.05,
-    ],
-    phystd = [
-        0.05,
-        0.05,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    n_leapfrog = 30, progress = true)
-
-sol_lux = solve(prob, alg2)
-
-#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
-t = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-
-# plotting solution for x,y for chain_flux
-plot(t, sol_flux_pestim.ensemblesol[1])
-plot!(t, sol_flux_pestim.ensemblesol[2])
-
-plot(sol_flux_pestim.ens1mblesol[1])
-plot!(sol_flux_pestim.ensemblesol[2])
-
-# estimated ODE parameters by .estimated_ode_params, weights and biases by .estimated_nn_params
-sol_flux_pestim.estimated_nn_params
-sol_flux_pestim.estimated_ode_params
-
-# plotting solution for x,y for chain_lux
-plot(t, sol_lux.ensemblesol[1])
-plot!(t, sol_lux.ensemblesol[2])
-
-# estimated weights and biases by .estimated_nn_params for chain_lux
-sol_lux.estimated_nn_params
-
-# # ----------------------------------stats-----------------------------
-# #   ----------------------------
-# # -----------------------------
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:04:47 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:03:38 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:04:12 
-# #   --------------------------
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:05:09 
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:47 
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:25 
-# #   --------------
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:06:47 
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:05:54
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:05:46
-# # ------------------------
-# # -----------------------
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -882.2934218498742
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:04:06
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -882.2934218498742
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:03:32
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -882.2934218498742
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:03:01 
-# # --------------------------
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1411.1717435511828
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:02
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1411.1717435511828
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:08
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1411.1717435511828
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:15
-# # ----------------------------
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -3240.067149411982
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:05:37
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -3240.067149411982
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:06:02
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -3240.067149411982
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:06:13
-
-using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL
-import ModelingToolkit: Interval, infimum, supremum
-
-using NeuralPDE, Flux, OptimizationOptimisers
-
-function diffeq(u, p, t)
-    u1, u2 = u
-    return [u2, p[1] + p[2] * sin(u1) + p[3] * u2]
-end
-p = [5, -10, -1.7]
-u0 = [-1.0, 7.0]
-tspan = (0.0, 10.0)
-prob = ODEProblem(ODEFunction(diffeq), u0, tspan, p)
-
-chainnew = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2)) |>
-           Flux.f64
-
-opt = OptimizationOptimisers.Adam(0.1)
-opt = Optimisers.ADAGrad(0.1)
-opt = Optimisers.AdaMax(0.01)
-algnew = NeuralPDE.NNODE(chainnew, opt)
-solution_new = solve(prob, algnew, verbose = true,
-    abstol = 1e-10, maxiters = 7000)
-u = reduce(hcat, solution_new.u)
-plot(solution_new.t, u[1, :])
-plot!(solution_new.t, u[2, :])
-
-algnew = NeuralPDE.BNNODE(chainnew, draw_samples = 200,
-    n_leapfrog = 30, progress = true)
-solution_new = solve(prob, algnew)
-
-@parameters t
-@variables u1(..), u2(..)
-D = Differential(t)
-eq = [D(u1(t)) ~ u2(t),
-    D(u2(t)) ~ 5 - 10 * sin(u1(t)) - 1.7 * u2(t)];
-
-import ModelingToolkit: Interval
-bcs = [u1(0) ~ -1, u2(0) ~ 7]
-domains = [t ∈ Interval(0.0, 10.0)]
-dt = 0.01
-
-input_ = length(domains) # number of dimensions
-n = 16
-chain = [Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ), Lux.Dense(n, 1))
-         for _ in 1:2]
-
-@named pde_system = PDESystem(eq, bcs, domains, [t], [u1(t), u2(t)])
-
-strategy = NeuralPDE.GridTraining(dt)
-discretization = PhysicsInformedNN(chain, strategy)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-
-pde_loss_functions = sym_prob.loss_functions.pde_loss_functions
-bc_loss_functions = sym_prob.loss_functions.bc_loss_functions
-
-callback = function (p, l)
-    println("loss: ", l)
-    # println("pde_losses: ", map(l_ -> l_(p), pde_loss_functions))
-    # println("bcs_losses: ", map(l_ -> l_(p), bc_loss_functions))
-    return false
-end
-
-loss_functions = [pde_loss_functions; bc_loss_functions]
-
-function loss_function(θ, p)
-    sum(map(l -> l(θ), loss_functions))
-end
-
-f_ = OptimizationFunction(loss_function, Optimization.AutoZygote())
-prob = Optimization.OptimizationProblem(f_, sym_prob.flat_init_params)
-
-res = Optimization.solve(prob,
-    OptimizationOptimJL.BFGS();
-    callback = callback,
-    maxiters = 1000)
-phi = discretization.phi
\ No newline at end of file
+param1 = sol3lux_pestim.estimated_de_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
\ No newline at end of file
diff --git a/test/BPINN_newform.jl b/test/BPINN_newform.jl
new file mode 100644
index 0000000000..fa2f04073e
--- /dev/null
+++ b/test/BPINN_newform.jl
@@ -0,0 +1,4354 @@
+# # Testing Code
+using Test, MCMCChains
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using Flux, OptimizationOptimisers, AdvancedHMC, Lux
+using Statistics, Random, Functors, ComponentArrays
+using NeuralPDE, MonteCarloMeasurements
+
+# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
+# on latest Julia version it performs much better for below tests
+Random.seed!(100)
+
+# for sampled params->lux ComponentArray
+function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
+    @assert length(ps_new) == Lux.parameterlength(ps)
+    i = 1
+    function get_ps(x)
+        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
+        i += length(x)
+        return z
+    end
+    return Functors.fmap(get_ps, ps)
+end
+
+## PROBLEM-1 (WITHOUT PARAMETER ESTIMATION)
+linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+linear = (u, p, t) -> cos(2 * π * t)
+tspan = (0.0, 2.0)
+u0 = 0.0
+prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+p = prob.p
+
+# Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
+ta = range(tspan[1], tspan[2], length = 300)
+u = [linear_analytic(u0, nothing, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+time = vec(collect(Float64, ta))
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# testing points for solve() call must match saveat(1/50.0) arg
+ta0 = range(tspan[1], tspan[2], length = 101)
+u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
+x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
+time1 = vec(collect(Float64, ta0))
+physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+init1, re1 = destructure(chainflux)
+θinit, st = Lux.setup(Random.default_rng(), chainlux)
+
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux,
+    draw_samples = 2500,
+    n_leapfrog = 30)
+
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux,
+    draw_samples = 2500,
+    n_leapfrog = 30)
+
+# can change training strategies by adding this to call (Quadratuer and GridTraining show good results but stochastics sampling techniques perform bad)
+# strategy = QuadratureTraining(; quadrature_alg = QuadGKJL(),
+#     reltol = 1e-6,
+#     abstol = 1e-3, maxiters = 1000,
+#     batch = 0)
+
+alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500,
+    n_leapfrog = 30)
+sol1flux = solve(prob, alg)
+
+alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500,
+    n_leapfrog = 30)
+sol1lux = solve(prob, alg)
+
+# testing points
+t = time
+# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+out = re1.(fhsamples1[(end - 500):end])
+yu = collect(out[i](t') for i in eachindex(out))
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+θ = [vector_to_parameters(fhsamples1[i], θinit) for i in 2000:2500]
+luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# --------------------- ahmc_bayesian_pinn_ode() call
+@test mean(abs.(x̂ .- meanscurve1)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve1)) < 0.005
+@test mean(abs.(x̂ .- meanscurve2)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve2)) < 0.005
+
+#--------------------- solve() call 
+@test mean(abs.(x̂1 .- sol1flux.ensemblesol[1])) < 0.05
+@test mean(abs.(physsol0_1 .- sol1flux.ensemblesol[1])) < 0.05
+@test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
+@test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
+
+## PROBLEM-1 (WITH PARAMETER ESTIMATION)
+linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+linear = (u, p, t) -> cos(p * t)
+tspan = (0.0, 2.0)
+u0 = 0.0
+p = 2 * pi
+prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
+
+# Numerical and Analytical Solutions
+sol1 = solve(prob, Tsit5(); saveat = 0.01)
+u = sol1.u
+time = sol1.t
+
+# BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) .+ (0.2 .* Array(u) .* randn(size(u))))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
+ta0 = range(tspan[1], tspan[2], length = 101)
+u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
+time1 = vec(collect(Float64, ta0))
+physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+using Plots, StatsPlots
+# plot(dataset[2], calderivatives(dataset)')
+yu = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+plot(yu, [linear_analytic(u0, p, t) for t in yu])
+chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+init1, re1 = destructure(chainflux1)
+θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0f0,
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(9,
+            0.5),
+    ],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+    dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 10.0),
+    l2std = [0.005], phystd = [0.01],
+    param = [Normal(11, 6)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+# original paper (pure data 0 1)
+sol1flux = solve(prob, alg)
+sol1flux.estimated_ode_params
+# pure data method 1 1
+sol2flux = solve(prob, alg)
+sol2flux.estimated_ode_params
+# pure data method 1 0
+sol3flux = solve(prob, alg)
+sol3flux.estimated_ode_params
+# deri collocation
+sol4flux = solve(prob, alg)
+sol4flux.estimated_ode_params
+# collocation
+sol5flux = solve(prob, alg)
+sol5flux.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux = solve(prob, alg)
+sol6flux.estimated_ode_params
+# 2500 iters
+sol7flux = solve(prob, alg)
+sol7flux.estimated_ode_params
+
+plotly()
+plot!(yu, sol1flux.ensemblesol[1])
+plot!(yu, sol2flux.ensemblesol[1])
+plot!(yu, sol3flux.ensemblesol[1])
+plot!(yu, sol4flux.ensemblesol[1])
+plot!(yu, sol5flux.ensemblesol[1])
+plot!(yu, sol6flux.ensemblesol[1])
+
+plot!(dataset[2], dataset[1])
+
+# plot!(sol4flux.ensemblesol[1])
+# plot!(sol5flux.ensemblesol[1])
+
+sol2flux.estimated_ode_params
+
+sol1flux.estimated_ode_params
+
+sol3flux.estimated_ode_params
+
+sol4flux.estimated_ode_params
+
+sol5flux.estimated_ode_params
+
+alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0f0,
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(9,
+            0.5),
+    ],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+
+sol2lux = solve(prob, alg)
+
+# testing points
+t = time
+# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+out = re1.([fhsamples1[i][1:22] for i in 2000:2500])
+yu = collect(out[i](t') for i in eachindex(out))
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+θ = [vector_to_parameters(fhsamples2[i][1:(end - 1)], θinit) for i in 2000:2500]
+luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# --------------------- ahmc_bayesian_pinn_ode() call  
+@test mean(abs.(physsol1 .- meanscurve1)) < 0.15
+@test mean(abs.(physsol1 .- meanscurve2)) < 0.15
+
+# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.25 * p)
+@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.25 * p)
+
+#-------------------------- solve() call  
+@test mean(abs.(physsol1_1 .- sol2flux.ensemblesol[1])) < 8e-2
+@test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
+
+# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+@test abs(p - sol1flux.estimated_ode_params[1]) < abs(0.15 * p)
+@test abs(p - sol2lux.estimated_ode_params[1]) < abs(0.15 * p)
+
+## PROBLEM-2
+linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = -5.0
+prob = ODEProblem(linear, u0, tspan, p)
+linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
+
+# SOLUTION AND CREATE DATASET
+sol = solve(prob, Tsit5(); saveat = 0.1)
+u = sol.u
+time = sol.t
+x̂ = u .+ (u .* 0.2) .* randn(size(u))
+dataset = [x̂, time]
+t = sol.t
+physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)]
+
+ta0 = range(tspan[1], tspan[2], length = 501)
+u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+time1 = vec(collect(Float64, ta0))
+physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux12 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh),
+    Flux.Dense(6, 1)) |> Flux.f64
+chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+init1, re1 = destructure(chainflux12)
+θinit, st = Lux.setup(Random.default_rng(), chainlux12)
+
+using Flux
+using Random
+
+function derivatives(chainflux, dataset)
+    loss(x, y) = Flux.mse(chainflux(x), y)
+    optimizer = Flux.Optimise.ADAM(0.01)
+    epochs = 2500
+    for epoch in 1:epochs
+        Flux.train!(loss, Flux.params(chainflux), [(dataset[2]', dataset[1]')], optimizer)
+    end
+    getgradient(chainflux, dataset)
+end
+
+function getgradient(chainflux, dataset)
+    return (chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64)))) .-
+            chainflux(dataset[end]')) ./
+           sqrt(eps(eltype(dataset[end][1])))
+end
+
+ans = derivatives(chainflux12, dataset)
+
+init3, re = destructure(chainflux12)
+init2 == init1
+init3 == init2
+plot!(dataset[end], ans')
+plot!(dataset[end], chainflux12(dataset[end]')')
+
+ars = getgradient(chainflux12, dataset)
+
+plot!(dataset[end], ars')
+
+fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob,
+    chainflux12,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03],
+    priorsNNw = (0.0,
+        10.0),
+    n_leapfrog = 30)
+
+fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(prob,
+    chainflux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30)
+
+fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    n_leapfrog = 30)
+
+fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30)
+
+alg1 = NeuralPDE.BNNODE(chainflux12,
+    dataset = dataset,
+    draw_samples = 500,
+    l2std = [0.01],
+    phystd = [
+        0.03,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30, progress = true)
+
+# original paper (pure data 0 1)
+sol1flux_pestim = solve(prob, alg1)
+sol1flux_pestim.estimated_ode_params
+# pure data method 1 1
+sol2flux_pestim = solve(prob, alg1)
+sol2flux_pestim.estimated_ode_params
+# pure data method 1 0
+sol3flux_pestim = solve(prob, alg1)
+sol3flux_pestim.estimated_ode_params
+# deri collocation
+sol4flux_pestim = solve(prob, alg1)
+sol4flux_pestim.estimated_ode_params
+# collocation
+sol5flux_pestim = solve(prob, alg1)
+sol5flux_pestim.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux_pestim = solve(prob, alg1)
+sol6flux_pestim.estimated_ode_params
+
+using Plots, StatsPlots
+ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+plot(time, u)
+plot!(ars, sol1flux_pestim.ensemblesol[1])
+plot!(ars, sol2flux_pestim.ensemblesol[1])
+plot!(ars, sol3flux_pestim.ensemblesol[1])
+plot!(ars, sol4flux_pestim.ensemblesol[1])
+plot!(ars, sol5flux_pestim.ensemblesol[1])
+plot!(ars, sol6flux_pestim.ensemblesol[1])
+
+sol3flux_pestim.estimated_ode_params
+
+sol4flux_pestim.estimated_ode_params
+
+sol5flux_pestim.estimated_ode_params
+
+sol6flux_pestim.estimated_ode_params
+
+ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+init, re1 = destructure(chainflux12)
+init
+init1
+alg = NeuralPDE.BNNODE(chainlux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30)
+
+sol3lux_pestim = solve(prob, alg)
+
+# testing timepoints
+t = sol.t
+#------------------------------ ahmc_bayesian_pinn_ode() call 
+# Mean of last 500 sampled parameter's curves(flux chains)[Ensemble predictions]
+out = re1.([fhsamplesflux12[i][1:61] for i in 1000:1500])
+yu = [out[i](t') for i in eachindex(out)]
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1_1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+out = re1.([fhsamplesflux22[i][1:61] for i in 1000:1500])
+yu = [out[i](t') for i in eachindex(out)]
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1_2 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+@test mean(abs.(sol.u .- meanscurve1_1)) < 1e-2
+@test mean(abs.(physsol1 .- meanscurve1_1)) < 1e-2
+@test mean(abs.(sol.u .- meanscurve1_2)) < 5e-2
+@test mean(abs.(physsol1 .- meanscurve1_2)) < 5e-2
+
+# estimated parameters(flux chain)
+param1 = mean(i[62] for i in fhsamplesflux22[1000:1500])
+@test abs(param1 - p) < abs(0.3 * p)
+
+# Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
+θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
+luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500]
+luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+@test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
+@test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
+@test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
+@test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
+
+# estimated parameters(lux chain)
+param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
+@test abs(param1 - p) < abs(0.3 * p)
+
+#-------------------------- solve() call 
+# (flux chain)
+@test mean(abs.(physsol2 .- sol3flux_pestim.ensemblesol[1])) < 0.15
+# estimated parameters(flux chain)
+param1 = sol3flux_pestim.estimated_ode_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
+
+# (lux chain)
+@test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
+# estimated parameters(lux chain)
+param1 = sol3lux_pestim.estimated_ode_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
+
+using Plots, StatsPlots
+using NoiseRobustDifferentiation, Weave, DataInterpolations
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood
+# # 25 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+#     draw_samples = 1500, physdt = 1 / 50.0f0, phystd = [0.01],
+#     l2std = [0.01],
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1 = solve(prob, alg)
+# sol2flux1.estimated_ode_params[1] #6.41722 Particles{Float64, 1}, 6.02404 Particles{Float64, 1}
+# sol2flux2 = solve(prob, alg)
+# sol2flux2.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.07509 Particles{Float64, 1}
+# sol2flux3 = solve(prob, alg)
+# sol2flux3.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.00825 Particles{Float64, 1}
+
+# # 50 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11 = solve(prob, alg)
+# sol2flux11.estimated_ode_params[1] #5.71268 Particles{Float64, 1}, 6.07242 Particles{Float64, 1}
+# sol2flux22 = solve(prob, alg)
+# sol2flux22.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.04837 Particles{Float64, 1}
+# sol2flux33 = solve(prob, alg)
+# sol2flux33.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.02838 Particles{Float64, 1}
+
+# # 100 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111 = solve(prob, alg)
+# sol2flux111.estimated_ode_params[1] #6.59097 Particles{Float64, 1}, 5.89384 Particles{Float64, 1}
+# sol2flux222 = solve(prob, alg)
+# sol2flux222.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.88216 Particles{Float64, 1}
+# sol2flux333 = solve(prob, alg)
+# sol2flux333.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.85327 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, full likelihood cdm
+# # 25 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1_cdm = solve(prob, alg)
+# sol2flux1_cdm.estimated_ode_params[1]# 6.50506 Particles{Float64, 1} ,6.38963 Particles{Float64, 1}
+# sol2flux2_cdm = solve(prob, alg)
+# sol2flux2_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.39817 Particles{Float64, 1}
+# sol2flux3_cdm = solve(prob, alg)
+# sol2flux3_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.36296 Particles{Float64, 1}
+
+# # 50 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11_cdm = solve(prob, alg)
+# sol2flux11_cdm.estimated_ode_params[1] #6.52951 Particles{Float64, 1},5.15621 Particles{Float64, 1}
+# sol2flux22_cdm = solve(prob, alg)
+# sol2flux22_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.16363 Particles{Float64, 1}
+# sol2flux33_cdm = solve(prob, alg)
+# sol2flux33_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.15591 Particles{Float64, 1}
+
+# # 100 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111_cdm = solve(prob, alg)
+# sol2flux111_cdm.estimated_ode_params[1] #6.74338 Particles{Float64, 1}, 9.72422 Particles{Float64, 1}
+# sol2flux222_cdm = solve(prob, alg)
+# sol2flux222_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.71991 Particles{Float64, 1}
+# sol2flux333_cdm = solve(prob, alg)
+# sol2flux333_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.75045 Particles{Float64, 1}
+
+# --------------------------------------------------------------------------------------
+#                              NEW SERIES OF TESTS (IN ORDER OF EXECUTION)
+#  -------------------------------------------------------------------------------------
+# original paper implementaion
+# 25 points
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, u .+ 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset1 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+# scatter!(time, u)
+# dataset
+# scatter!(dataset1[2], dataset1[1])
+# plot(time, physsol1)
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_normal = solve(prob, alg)
+sol2flux1_normal.estimated_ode_params[1]  #7.70593 Particles{Float64, 1}, 6.36096 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
+sol2flux2_normal = solve(prob, alg)
+sol2flux2_normal.estimated_ode_params[1] #6.66347 Particles{Float64, 1}, 6.36974 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
+sol2flux3_normal = solve(prob, alg)
+sol2flux3_normal.estimated_ode_params[1] #6.84827 Particles{Float64, 1}, 6.29555 Particles{Float64, 1} | 6.39947 Particles{Float64, 1}
+
+# 50 points
+ta = range(tspan[1], tspan[2], length = 50)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset2 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_normal = solve(prob, alg)
+sol2flux11_normal.estimated_ode_params[1] #7.83577 Particles{Float64, 1},6.24652 Particles{Float64, 1} | 6.34495 Particles{Float64, 1}
+sol2flux22_normal = solve(prob, alg)
+sol2flux22_normal.estimated_ode_params[1] #6.49477 Particles{Float64, 1},6.2118 Particles{Float64, 1} | 6.32476 Particles{Float64, 1}
+sol2flux33_normal = solve(prob, alg)
+sol2flux33_normal.estimated_ode_params[1] #6.47421 Particles{Float64, 1},6.33687 Particles{Float64, 1} | 6.2448 Particles{Float64, 1}
+
+# 100 points
+ta = range(tspan[1], tspan[2], length = 100)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset3 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_normal = solve(prob, alg)
+sol2flux111_normal.estimated_ode_params[1] #5.96604 Particles{Float64, 1},5.99588 Particles{Float64, 1} | 6.19805 Particles{Float64, 1}
+sol2flux222_normal = solve(prob, alg)
+sol2flux222_normal.estimated_ode_params[1] #6.05432 Particles{Float64, 1},6.0768 Particles{Float64, 1} | 6.22948 Particles{Float64, 1}
+sol2flux333_normal = solve(prob, alg)
+sol2flux333_normal.estimated_ode_params[1] #6.08856 Particles{Float64, 1},5.94819 Particles{Float64, 1} | 6.2551 Particles{Float64, 1}
+
+# LOTKA VOLTERRA CASE 
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u01 = [1.0, 1.0]
+p1 = [1.5, 1.0, 3.0, 1.0]
+tspan1 = (0.0, 6.0)
+prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
+
+# chainlux = Lux.Chain(Lux.Dense(1, 7, Lux.tanh), Lux.Dense(7, 7, Lux.tanh), Lux.Dense(7, 2))
+chainflux1 = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2))
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t1 = collect(Float64, prob1.tspan[1]:(1 / 50.0):prob1.tspan[2])
+
+# --------------------------------------------------------------------------
+# original paper implementaion lotka volterra
+# 31 points  
+solution1 = solve(prob1, Tsit5(); saveat = 0.1)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] .+ 0.3 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] .+ 0.3 .* u1[2, :] .* randn(length(u1[2, :]))
+dataset2_1 = [x1, y1, time1]
+plot(dataset2_1[end], dataset2_1[1])
+plot!(dataset2_1[end], dataset2_1[2])
+plot!(time1, u1[1, :])
+plot!(time1, u1[2, :])
+
+alg1 = NeuralPDE.BNNODE(chainflux1,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    physdt = 1 / 20.0,
+    l2std = [
+        0.2,
+        0.2,
+    ],
+    phystd = [
+        0.5,
+        0.5,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(4,
+            3),
+        Normal(-2,
+            4),
+        Normal(0,
+            5),
+        Normal(2.5,
+            2)],
+    n_leapfrog = 30, progress = true)
+
+# original paper (pure data 0 1)
+sol1flux1_lotka = solve(prob1, alg1)
+sol1flux1_lotka.estimated_ode_params
+# pure data method 1 1
+sol2flux1_lotka = solve(prob1, alg1)
+sol2flux1_lotka.estimated_ode_params
+# pure data method 1 0
+sol3flux1_lotka = solve(prob1, alg1)
+sol3flux1_lotka.estimated_ode_params
+# deri collocation
+sol4flux1_lotka = solve(prob1, alg1)
+sol4flux1_lotka.estimated_ode_params
+# collocation
+sol5flux1_lotka = solve(prob1, alg1)
+sol5flux1_lotka.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux1_lotka = solve(prob1, alg1)
+sol6flux1_lotka.estimated_ode_params
+
+sol7flux1_lotka = solve(prob1, alg1)
+sol7flux1_lotka.estimated_ode_params
+
+using Plots, StatsPlots
+plot(dataset2_1[3], u1[1, :])
+plot!(dataset2_1[3], u1[2, :])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol5flux1_normal.ensemblesol[2])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]),
+    sol1flux1_normal.ensemblesol[1],
+    legend = :outerbottomleft)
+sol1flux2_normal = solve(prob1, alg1)
+sol1flux2_normal.estimated_ode_params  #|
+sol1flux3_normal = solve(prob1, alg1)
+sol1flux3_normal.estimated_ode_params  #|
+sol1flux4_normal = solve(prob1, alg1)
+sol1flux4_normal.estimated_ode_params
+
+plotly()
+plot!(title = "yuh")
+plot!(dataset2_1[3], dataset2_1[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux1_normal.ensemblesol[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux2_normal.ensemblesol[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux3_normal.ensemblesol[2])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux4_normal.ensemblesol[1])
+plot(time1, u1[1, :])
+plot!(time1, u1[2, :])
+
+ars = chainflux1(dataset2_1[end]')
+plot(ars[1, :])
+plot!(ars[2, :])
+
+function calculate_derivatives(dataset)
+    u = dataset[1]
+    u1 = dataset[2]
+    t = dataset[end]
+    # control points
+    n = Int(floor(length(t) / 10))
+    # spline for datasetvalues(solution) 
+    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    interp = CubicSpline(u, t)
+    interp1 = CubicSpline(u1, t)
+    # derrivatives interpolation
+    dx = t[2] - t[1]
+    time = collect(t[1]:dx:t[end])
+    smoothu = [interp(i) for i in time]
+    smoothu1 = [interp1(i) for i in time]
+    # derivative of the spline (must match function derivative) 
+    û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
+    û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
+    # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
+    # FDM
+    # û1 = diff(u) / dx
+    # dataset[1] and smoothu are almost equal(rounding errors)
+    return û, û1
+    # return 1
+end
+
+ar = calculate_derivatives(dataset2_1)
+plot(ar[1])
+plot!(ar[2])
+
+# 61 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.1)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_2 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.1,
+        0.1,
+    ],
+    phystd = [
+        0.1,
+        0.1,
+    ],
+    priorsNNw = (0.0,
+        5.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_normal = solve(prob1, alg1)
+sol1flux11_normal.estimated_ode_params #|
+sol1flux22_normal = solve(prob1, alg1)
+sol1flux22_normal.estimated_ode_params #|
+sol1flux33_normal = solve(prob1, alg1)
+sol1flux33_normal.estimated_ode_params #|
+
+# 121 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_3 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.1,
+        0.1,
+    ],
+    phystd = [
+        0.1,
+        0.1,
+    ],
+    priorsNNw = (0.0,
+        5.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_normal = solve(prob1, alg1)
+sol1flux111_normal.estimated_ode_params #|
+sol1flux222_normal = solve(prob1, alg1)
+sol1flux222_normal.estimated_ode_params #|
+sol1flux333_normal = solve(prob1, alg1)
+sol1flux333_normal.estimated_ode_params #| 
+
+# -------------------------------------------------------------------- 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:02:30
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:01:54
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:01:59
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:44
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:41
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:41
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:52
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:49
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:50
+
+# # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+# physics Logpdf is : -6.659143464386241e7
+# prior Logpdf is : -150.30074579848434
+# L2lossData Logpdf is : -6.03075717462954e6
+# Sampling 100%|███████████████████████████████| Time: 0:04:54
+
+# physics Logpdf is : -8.70012053004202e8
+# prior Logpdf is : -150.3750892952511
+# L2lossData Logpdf is : -6.967914805207133e6
+# Sampling 100%|███████████████████████████████| Time: 0:05:09
+
+# physics Logpdf is : -5.417241281343099e7
+# prior Logpdf is : -150.52079555737976
+# L2lossData Logpdf is : -4.195953436792884e6
+# Sampling 100%|███████████████████████████████| Time: 0:05:01
+
+# physics Logpdf is : -4.579552981943833e8
+# prior Logpdf is : -150.30491731974283
+# L2lossData Logpdf is : -8.595475827260146e6
+# Sampling 100%|███████████████████████████████| Time: 0:06:08
+
+# physics Logpdf is : -1.989281834955769e7
+# prior Logpdf is : -150.16009042727543
+# L2lossData Logpdf is : -1.121270659669029e7
+# Sampling 100%|███████████████████████████████| Time: 0:05:38
+
+# physics Logpdf is : -8.683829147264534e8
+# prior Logpdf is : -150.37824872259102
+# L2lossData Logpdf is : -1.0887662888035845e7
+# Sampling 100%|███████████████████████████████| Time: 0:05:50
+
+# physics Logpdf is : -3.1944760610332566e8
+# prior Logpdf is : -150.33610348737565
+# L2lossData Logpdf is : -1.215458786744478e7
+# Sampling 100%|███████████████████████████████| Time: 0:10:50
+
+# physics Logpdf is : -3.2884572300341567e6
+# prior Logpdf is : -150.21002268156343
+# L2lossData Logpdf is : -1.102536731511176e7
+# Sampling 100%|███████████████████████████████| Time: 0:09:53
+
+# physics Logpdf is : -5.31293521002414e8
+# prior Logpdf is : -150.20948536040126
+# L2lossData Logpdf is : -1.818717239584132e7
+# Sampling 100%|███████████████████████████████| Time: 0:08:53
+
+# ----------------------------------------------------------
+# Full likelihood no l2 only new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new = solve(prob, alg)
+sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.21662 Particles{Float64, 1}
+sol2flux2_new = solve(prob, alg)
+sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 7.14238 Particles{Float64, 1}
+sol2flux3_new = solve(prob, alg)
+sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.79159 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new = solve(prob, alg)
+sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 5.33467 Particles{Float64, 1}
+sol2flux22_new = solve(prob, alg)
+sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.52419 Particles{Float64, 1}
+sol2flux33_new = solve(prob, alg)
+sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 5.36921 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new = solve(prob, alg)
+sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.45333 Particles{Float64, 1}
+sol2flux222_new = solve(prob, alg)
+sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 4.64417 Particles{Float64, 1}
+sol2flux333_new = solve(prob, alg)
+sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 5.88037 Particles{Float64, 1}
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new_all = solve(prob, alg)
+sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.4358 Particles{Float64, 1}
+sol2flux2_new_all = solve(prob, alg)
+sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 6.52449 Particles{Float64, 1}
+sol2flux3_new_all = solve(prob, alg)
+sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.34188 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new_all = solve(prob, alg)
+sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.37889 Particles{Float64, 1}
+sol2flux22_new_all = solve(prob, alg)
+sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.34747 Particles{Float64, 1}
+sol2flux33_new_all = solve(prob, alg)
+sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.39699 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new_all = solve(prob, alg)
+sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.24327 Particles{Float64, 1}
+sol2flux222_new_all = solve(prob, alg)
+sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 6.23928 Particles{Float64, 1}
+sol2flux333_new_all = solve(prob, alg)
+sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 6.2145 Particles{Float64, 1}
+
+# ---------------------------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients) lotka volterra
+# 36 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_new_all = solve(prob1, alg1)
+sol1flux1_new_all.estimated_ode_params[1]  #|
+sol1flux2_new_all = solve(prob1, alg1)
+sol1flux2_new_all.estimated_ode_params[1] #|
+sol1flux3_new_all = solve(prob1, alg1)
+sol1flux3_new_all.estimated_ode_params[1] #|
+
+# 61 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_new_all = solve(prob1, alg1)
+sol1flux11_new_all.estimated_ode_params[1] #|
+sol1flux22_new_all = solve(prob1, alg1)
+sol1flux22_new_all.estimated_ode_params[1] #|
+sol1flux33_new_all = solve(prob1, alg1)
+sol1flux33_new_all.estimated_ode_params[1] #|
+
+# 121 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_new_all = solve(prob1, alg1)
+sol1flux111_new_all.estimated_ode_params[1] #|
+sol1flux222_new_all = solve(prob1, alg1)
+sol1flux222_new_all.estimated_ode_params[1] #|
+sol1flux333_new_all = solve(prob1, alg1)
+sol1flux333_new_all.estimated_ode_params[1] #|
+# -------------------------------------------------------------------- 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:32
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:19
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:31
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:45
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:20
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:20
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:04:57
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:05:26
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:05:01
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients)
+# 25 points
+# 1*,2*,  
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_newdata_all = solve(prob, alg)
+sol2flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 5.73072 Particles{Float64, 1}
+sol2flux2_newdata_all = solve(prob, alg)
+sol2flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 5.71597 Particles{Float64, 1}
+sol2flux3_newdata_all = solve(prob, alg)
+sol2flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 5.7313 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_newdata_all = solve(prob, alg)
+sol2flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.07153 Particles{Float64, 1}
+sol2flux22_newdata_all = solve(prob, alg)
+sol2flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.06623 Particles{Float64, 1}
+sol2flux33_newdata_all = solve(prob, alg)
+sol2flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.12748 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_newdata_all = solve(prob, alg)
+sol2flux111_newdata_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.26222 Particles{Float64, 1}
+sol2flux222_newdata_all = solve(prob, alg)
+sol2flux222_newdata_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 5.86494 Particles{Float64, 1}
+sol2flux333_newdata_all = solve(prob, alg)
+sol2flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} |  
+
+# ---------------------------------------------------------------------------
+
+# LOTKA VOLTERRA CASE
+using Plots, StatsPlots
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u01 = [1.0, 1.0]
+p1 = [1.5, 1.0, 3.0, 1.0]
+tspan1 = (0.0, 6.0)
+prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
+
+chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t1 = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+# --------------------------------------------------------------------------
+# original paper implementaion
+# 25 points  
+solution1 = solve(prob1, Tsit5(); saveat = 0.2)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_1 = [x1, y1, time1]
+
+plot(time1, u1[1, :])
+plot!(time1, u1[2, :])
+scatter!(dataset2_1[3], dataset2_1[1])
+scatter!(dataset2_1[3], dataset2_1[2])
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_normal = solve(prob1, alg1)
+sol1flux1_normal.estimated_ode_params[1]  #|
+sol1flux2_normal = solve(prob1, alg1)
+sol1flux2_normal.estimated_ode_params[1] #|
+sol1flux3_normal = solve(prob1, alg1)
+sol1flux3_normal.estimated_ode_params[1] #|
+
+# 50 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_2 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_normal = solve(prob1, alg1)
+sol1flux11_normal.estimated_ode_params[1] #|
+sol1flux22_normal = solve(prob1, alg1)
+sol1flux22_normal.estimated_ode_params[1] #|
+sol1flux33_normal = solve(prob1, alg1)
+sol1flux33_normal.estimated_ode_params[1] #|
+
+# 100 points
+solution = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_3 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_normal = solve(prob1, alg1)
+sol1flux111_normal.estimated_ode_params[1] #|
+sol1flux222_normal = solve(prob1, alg1)
+sol1flux222_normal.estimated_ode_params[1] #|
+sol1flux333_normal = solve(prob1, alg1)
+sol1flux333_normal.estimated_ode_params[1] #|
+
+# --------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood no l2 only new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new = solve(prob, alg)
+sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
+sol2flux2_new = solve(prob, alg)
+sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
+sol2flux3_new = solve(prob, alg)
+sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new = solve(prob, alg)
+sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
+sol2flux22_new = solve(prob, alg)
+sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
+sol2flux33_new = solve(prob, alg)
+sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new = solve(prob, alg)
+sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}   |
+sol2flux222_new = solve(prob, alg)
+sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}     |
+sol2flux333_new = solve(prob, alg)
+sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new_all = solve(prob, alg)
+sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1}  |
+sol2flux2_new_all = solve(prob, alg)
+sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}    |
+sol2flux3_new_all = solve(prob, alg)
+sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}   |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new_all = solve(prob, alg)
+sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
+sol2flux22_new_all = solve(prob, alg)
+sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
+sol2flux33_new_all = solve(prob, alg)
+sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new_all = solve(prob, alg)
+sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}  |
+sol2flux222_new_all = solve(prob, alg)
+sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}    |
+sol2flux333_new_all = solve(prob, alg)
+sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}  |
+
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients)
+# 25 points 
+# *1,*2 vs *2.5
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_newdata_all = solve(prob, alg)
+sol1flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
+sol1flux2_newdata_all = solve(prob, alg)
+sol1flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
+sol1flux3_newdata_all = solve(prob, alg)
+sol1flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_newdata_all = solve(prob, alg)
+sol1flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}    |
+sol1flux22_newdata_all = solve(prob, alg)
+sol1flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}    |
+sol1flux33_newdata_all = solve(prob, alg)
+sol1flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1}   |
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_newdata_all = solve(prob, alg)
+sol1flux111_newdata_all.estimated_ode_params[1]  #|
+sol1flux222_newdata_all = solve(prob, alg)
+sol1flux222_newdata_all.estimated_ode_params[1]  #|
+sol1flux333_newdata_all = solve(prob, alg)
+sol1flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
+
+# ------------------------------------------------------------------------------------------------------------------------------
+
+# sol2flux111.estimated_ode_params[1]
+# # mine *5
+# 7.03386Particles{Float64, 1}
+# # normal
+# 6.38951Particles{Float64, 1}
+# 6.67657Particles{Float64, 1}
+# # mine *10
+# 7.53672Particles{Float64, 1}
+# # mine *2
+# 6.29005Particles{Float64, 1}
+# 6.29844Particles{Float64, 1}
+
+# # new mine *2
+# 6.39008Particles{Float64, 1}
+# 6.22071Particles{Float64, 1}
+# 6.15611Particles{Float64, 1}
+
+# # new mine *2 tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
+# 6.25549Particles{Float64, 1}
+# ----------------------------------------------------------
+
+# ---------------------------------------------------
+
+function calculate_derivatives1(dataset)
+    x̂, time = dataset
+    num_points = length(x̂)
+    # Initialize an array to store the derivative values.
+    derivatives = similar(x̂)
+
+    for i in 2:(num_points - 1)
+        # Calculate the first-order derivative using central differences.
+        Δt_forward = time[i + 1] - time[i]
+        Δt_backward = time[i] - time[i - 1]
+
+        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+        derivatives[i] = derivative
+    end
+
+    # Derivatives at the endpoints can be calculated using forward or backward differences.
+    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+    return derivatives
+end
+
+function calculate_derivatives2(dataset)
+    u = dataset[1]
+    t = dataset[2]
+    # control points
+    n = Int(floor(length(t) / 10))
+    # spline for datasetvalues(solution) 
+    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    interp = CubicSpline(u, t)
+    # derrivatives interpolation
+    dx = t[2] - t[1]
+    time = collect(t[1]:dx:t[end])
+    smoothu = [interp(i) for i in time]
+    # derivative of the spline (must match function derivative) 
+    û = tvdiff(smoothu, 20, 0.03, dx = dx, ε = 1)
+    # tvdiff(smoothu, 100, 0.1, dx = dx)
+    # 
+    # 
+    # FDM
+    û1 = diff(u) / dx
+    # dataset[1] and smoothu are almost equal(rounding errors)
+    return û, time, smoothu, û1
+end
+
+# need to do this for all datasets
+c = [linear(prob.u0, p, t) for t in dataset3[2]] #ideal case
+b = calculate_derivatives1(dataset2) #central diffs
+# a = calculate_derivatives2(dataset) #tvdiff(smoothu, 100, 0.1, dx = dx)
+d = calculate_derivatives2(dataset1) #tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
+d = calculate_derivatives2(dataset2)
+d = calculate_derivatives2(dataset3)
+mean(abs2.(c .- b))
+mean(abs2.(c .- d[1]))
+loss(model, x, y) = mean(abs2.(model(x) .- y));
+scatter!(prob.u0 .+ (prob.tspan[2] .- dataset3[2]) .* chainflux1(dataset3[2]')')
+loss(chainflux1, dataset3[2]', dataset3[1]')
+# mean(abs2.(c[1:24] .- a[4]))
+plot(c, label = "ideal deriv")
+plot!(b, label = "Centraldiff deriv")
+# plot!(a[1], label = "tvdiff(0.1,def) derivatives") 
+plot!(d[1], label = "tvdiff(0.035,20) derivatives")
+plotly()
+
+# GridTraining , NoiseRobustDiff dataset[2][2]-dataset[2][1] l2std
+# 25 points 
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+time1 = collect(tspan[1]:(1 / 50.0):tspan[2])
+physsol = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+plot(physsol, label = "solution")
+
+# plots from 32(deriv)
+# for d
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux1 = solve(prob, alg)
+n2_sol2flux1.estimated_ode_params[1]
+# with extra likelihood 
+# 10.2011Particles{Float64, 1}
+
+# without extra likelihood 
+# 6.25791Particles{Float64, 1}
+# 6.29539Particles{Float64, 1}
+
+plot!(n2_sol2flux1.ensemblesol[1], label = "tvdiff(0.035,1) derivpar")
+plot(dataset[1])
+plot!(physsol1)
+# for a
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux2 = solve(prob, alg)
+n2_sol2flux2.estimated_ode_params[1]
+# with extra likelihood
+# 8.73602Particles{Float64, 1}
+# without extra likelihood
+
+plot!(n2_sol2flux2.ensemblesol[1],
+    label = "tvdiff(0.1,def) derivatives",
+    legend = :outerbottomleft)
+
+# for b
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux3 = solve(prob, alg)
+n2_sol2flux3.estimated_ode_params[1]
+plot!(n2_sol2flux3.ensemblesol[1], label = "Centraldiff deriv")
+
+# for c
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux4 = solve(prob, alg)
+n2_sol2flux4.estimated_ode_params[1]
+plot!(n2_sol2flux4.ensemblesol[1], label = "ideal deriv")
+
+# 50 points 
+
+ta = range(tspan[1], tspan[2], length = 50)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux11 = solve(prob, alg)
+n2_sol2flux11.estimated_ode_params[1]
+
+# 5.90049Particles{Float64, 1}
+# 100 points
+ta = range(tspan[1], tspan[2], length = 100)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux111 = solve(prob, alg)
+n2_sol2flux111.estimated_ode_params[1]
+plot!(n2_sol2flux111.ensemblesol[1])
+8.88555Particles{Float64, 1}
+
+# 7.15353Particles{Float64, 1}
+# 6.21059 Particles{Float64, 1}
+# 6.31836Particles{Float64, 1}
+0.1 * p
+# ----------------------------------------------------------
+
+# Gives the linear interpolation value at t=3.5
+
+# # Problem 1 with param esimation
+# # dataset 0-1 2 percent noise
+# p = 6.283185307179586
+# # partial_logdensity
+# 6.3549Particles{Float64, 1}
+# # full log_density
+# 6.34667Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2lux.estimated_ode_params[1]
+
+# # dataset 0-1 20 percent noise
+# # partial log_density
+# 6.30244Particles{Float64, 1}
+# # full log_density
+# 6.24637Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # dataset 0-2 20percent noise
+# # partial log_density
+# 6.24948Particles{Float64, 1}
+# # full log_density
+# 6.26095Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+# linear = (u, p, t) -> cos(p * t)
+# tspan = (0.0, 2.0)
+
+# # dataset 0-1 2 percent noise
+# p = 6.283185307179586
+# # partial_logdensity
+# 6.3549Particles{Float64, 1}
+# # full log_density
+# 6.34667Particles{Float64, 1}
+
+# # dataset 0-1 20 percent noise
+# # partial log_density
+# 6.30244Particles{Float64, 1}
+# # full log_density
+# 6.24637Particles{Float64, 1}
+
+# # dataset 0-2 20percent noise
+# # partial log_density
+# 6.24948Particles{Float64, 1}
+# # full log_density
+# 6.26095Particles{Float64, 1}
+
+# # dataset 0-2 20percent noise 50 points(above all are 100 points)
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # i kinda win on 25 points again
+# # dataset 0-2 20percent noise 25 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # i win with 25 points
+# # dataset 0-1 20percent noise 25 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# # new
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# # New
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # (9,2.5)(above are (9,0.5))
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # just prev was repeat(just change)
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # i lose on 0-1,50 points
+# # dataset 0-1 20percent noise 50 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # (9,2.5) (above are (9,0.5))
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+# # Problem 1 with param estimation
+# # physdt=1/20, Full likelihood new 0.5*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux1 = solve(prob, alg)
+# n05_sol2flux1.estimated_ode_params[1] #6.90953 Particles{Float64, 1}
+# n05_sol2flux2 = solve(prob, alg)
+# n05_sol2flux2.estimated_ode_params[1] #6.82374 Particles{Float64, 1}
+# n05_sol2flux3 = solve(prob, alg)
+# n05_sol2flux3.estimated_ode_params[1] #6.84465 Particles{Float64, 1}
+
+# using Plots, StatsPlots
+# plot(n05_sol2flux3.ensemblesol[1])
+# plot!(physsol1)
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux11 = solve(prob, alg)
+# n05_sol2flux11.estimated_ode_params[1] #7.0262 Particles{Float64, 1}
+# n05_sol2flux22 = solve(prob, alg)
+# n05_sol2flux22.estimated_ode_params[1] #5.56438 Particles{Float64, 1}
+# n05_sol2flux33 = solve(prob, alg)
+# n05_sol2flux33.estimated_ode_params[1] #7.27189 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux111 = solve(prob, alg)
+# n05_sol2flux111.estimated_ode_params[1] #6.90549 Particles{Float64, 1}
+# n05_sol2flux222 = solve(prob, alg)
+# n05_sol2flux222.estimated_ode_params[1] #5.42436 Particles{Float64, 1}
+# n05_sol2flux333 = solve(prob, alg)
+# n05_sol2flux333.estimated_ode_params[1] #6.05832 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new 2*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux1 = solve(prob, alg)
+# n2_sol2flux1.estimated_ode_params[1]#6.9087 Particles{Float64, 1}
+# n2_sol2flux2 = solve(prob, alg)
+# n2_sol2flux2.estimated_ode_params[1]#6.86507 Particles{Float64, 1}
+# n2_sol2flux3 = solve(prob, alg)
+# n2_sol2flux3.estimated_ode_params[1]#6.59206 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux11 = solve(prob, alg)
+# n2_sol2flux11.estimated_ode_params[1]#7.3715 Particles{Float64, 1}
+# n2_sol2flux22 = solve(prob, alg)
+# n2_sol2flux22.estimated_ode_params[1]#9.84477 Particles{Float64, 1}
+# n2_sol2flux33 = solve(prob, alg)
+# n2_sol2flux33.estimated_ode_params[1]#6.87107 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux111 = solve(prob, alg)
+# n2_sol2flux111.estimated_ode_params[1]#6.60739 Particles{Float64, 1}
+# n2_sol2flux222 = solve(prob, alg)
+# n2_sol2flux222.estimated_ode_params[1]#7.05923 Particles{Float64, 1}
+# n2_sol2flux333 = solve(prob, alg)
+# n2_sol2flux333.estimated_ode_params[1]#6.5017 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new all 2*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux1 = solve(prob, alg)
+# n2all5sol2flux1.estimated_ode_params[1]#11.3659 Particles{Float64, 1}
+# n2all5sol2flux2 = solve(prob, alg)
+# n2all5sol2flux2.estimated_ode_params[1]#6.65634 Particles{Float64, 1}
+# n2all5sol2flux3 = solve(prob, alg)
+# n2all5sol2flux3.estimated_ode_params[1]#6.61905 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux11 = solve(prob, alg)
+# n2all5sol2flux11.estimated_ode_params[1]#6.27555 Particles{Float64, 1}
+# n2all5sol2flux22 = solve(prob, alg)
+# n2all5sol2flux22.estimated_ode_params[1]#6.24352 Particles{Float64, 1}
+# n2all5sol2flux33 = solve(prob, alg)
+# n2all5sol2flux33.estimated_ode_params[1]#6.33723 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux111 = solve(prob, alg)
+# n2all5sol2flux111.estimated_ode_params[1] #5.95535 Particles{Float64, 1}
+# n2all5sol2flux222 = solve(prob, alg)
+# n2all5sol2flux222.estimated_ode_params[1] #5.98301 Particles{Float64, 1}
+# n2all5sol2flux333 = solve(prob, alg)
+# n2all5sol2flux333.estimated_ode_params[1] #5.9081 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new all (l2+l22)
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux1 = solve(prob, alg)
+# nall5sol2flux1.estimated_ode_params[1]#6.54705 Particles{Float64, 1}
+# nall5sol2flux2 = solve(prob, alg)
+# nall5sol2flux2.estimated_ode_params[1]#6.6967 Particles{Float64, 1}
+# nall5sol2flux3 = solve(prob, alg)
+# nall5sol2flux3.estimated_ode_params[1]#6.47173 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux11 = solve(prob, alg)
+# nall5sol2flux11.estimated_ode_params[1]#6.2113 Particles{Float64, 1}
+# nall5sol2flux22 = solve(prob, alg)
+# nall5sol2flux22.estimated_ode_params[1]#6.10675 Particles{Float64, 1}
+# nall5sol2flux33 = solve(prob, alg)
+# nall5sol2flux33.estimated_ode_params[1]#6.11541 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux111 = solve(prob, alg)
+# nall5sol2flux111.estimated_ode_params[1]#6.35224 Particles{Float64, 1}
+# nall5sol2flux222 = solve(prob, alg)
+# nall5sol2flux222.estimated_ode_params[1]#6.40542 Particles{Float64, 1}
+# nall5sol2flux333 = solve(prob, alg)
+# nall5sol2flux333.estimated_ode_params[1]#6.44206 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new 5* (new only l22 mod)
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux1 = solve(prob, alg)
+# n5sol2flux1.estimated_ode_params[1]#7.05077 Particles{Float64, 1}
+# n5sol2flux2 = solve(prob, alg)
+# n5sol2flux2.estimated_ode_params[1]#7.07303 Particles{Float64, 1}
+# n5sol2flux3 = solve(prob, alg)
+# n5sol2flux3.estimated_ode_params[1]#5.10622 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux11 = solve(prob, alg)
+# n5sol2flux11.estimated_ode_params[1]#7.39852 Particles{Float64, 1}
+# n5sol2flux22 = solve(prob, alg)
+# n5sol2flux22.estimated_ode_params[1]#7.30319 Particles{Float64, 1}
+# n5sol2flux33 = solve(prob, alg)
+# n5sol2flux33.estimated_ode_params[1]#6.73722 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux111 = solve(prob, alg)
+# n5sol2flux111.estimated_ode_params[1]#7.15996 Particles{Float64, 1}
+# n5sol2flux222 = solve(prob, alg)
+# n5sol2flux222.estimated_ode_params[1]#7.02949 Particles{Float64, 1}
+# n5sol2flux333 = solve(prob, alg)
+# n5sol2flux333.estimated_ode_params[1]#6.9393 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux1 = solve(prob, alg)
+# nsol2flux1.estimated_ode_params[1] #5.82707 Particles{Float64, 1}
+# nsol2flux2 = solve(prob, alg)
+# nsol2flux2.estimated_ode_params[1] #4.81534 Particles{Float64, 1}
+# nsol2flux3 = solve(prob, alg)
+# nsol2flux3.estimated_ode_params[1] #5.52965 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux11 = solve(prob, alg)
+# nsol2flux11.estimated_ode_params[1] #7.04027 Particles{Float64, 1}
+# nsol2flux22 = solve(prob, alg)
+# nsol2flux22.estimated_ode_params[1] #7.17588 Particles{Float64, 1}
+# nsol2flux33 = solve(prob, alg)
+# nsol2flux33.estimated_ode_params[1] #6.94495 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux111 = solve(prob, alg)
+# nsol2flux111.estimated_ode_params[1] #6.06608 Particles{Float64, 1}
+# nsol2flux222 = solve(prob, alg)
+# nsol2flux222.estimated_ode_params[1] #6.84726 Particles{Float64, 1}
+# nsol2flux333 = solve(prob, alg)
+# nsol2flux333.estimated_ode_params[1] #6.83463 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1 = solve(prob, alg)
+# sol2flux1.estimated_ode_params[1] #6.71397 Particles{Float64, 1} 6.37604 Particles{Float64, 1}
+# sol2flux2 = solve(prob, alg)
+# sol2flux2.estimated_ode_params[1] #6.73509 Particles{Float64, 1} 6.21692 Particles{Float64, 1}
+# sol2flux3 = solve(prob, alg)
+# sol2flux3.estimated_ode_params[1] #6.65453 Particles{Float64, 1} 6.23153 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11 = solve(prob, alg)
+# sol2flux11.estimated_ode_params[1] #6.23443 Particles{Float64, 1} 6.30635 Particles{Float64, 1}
+# sol2flux22 = solve(prob, alg)
+# sol2flux22.estimated_ode_params[1] #6.18879 Particles{Float64, 1} 6.30099 Particles{Float64, 1}
+# sol2flux33 = solve(prob, alg)
+# sol2flux33.estimated_ode_params[1] #6.22773 Particles{Float64, 1} 6.30671 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111 = solve(prob, alg)
+# sol2flux111.estimated_ode_params[1] #6.15832 Particles{Float64, 1} 6.35453 Particles{Float64, 1}
+# sol2flux222 = solve(prob, alg)
+# sol2flux222.estimated_ode_params[1] #6.16968 Particles{Float64, 1}6.31125 Particles{Float64, 1}
+# sol2flux333 = solve(prob, alg)
+# sol2flux333.estimated_ode_params[1] #6.12466 Particles{Float64, 1} 6.26514 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1_p = solve(prob, alg)
+# sol2flux1_p.estimated_ode_params[1] #5.74065 Particles{Float64, 1} #6.83683 Particles{Float64, 1}
+# sol2flux2_p = solve(prob, alg)
+# sol2flux2_p.estimated_ode_params[1] #9.82504 Particles{Float64, 1} #6.14568 Particles{Float64, 1}
+# sol2flux3_p = solve(prob, alg)
+# sol2flux3_p.estimated_ode_params[1] #5.75075 Particles{Float64, 1} #6.08579 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11_p = solve(prob, alg)
+# sol2flux11_p.estimated_ode_params[1] #6.19414 Particles{Float64, 1} #6.04621 Particles{Float64, 1}
+# sol2flux22_p = solve(prob, alg)
+# sol2flux22_p.estimated_ode_params[1] #6.15227 Particles{Float64, 1} #6.29086 Particles{Float64, 1}
+# sol2flux33_p = solve(prob, alg)
+# sol2flux33_p.estimated_ode_params[1] #6.19048 Particles{Float64, 1} #6.12516 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111_p = solve(prob, alg)
+# sol2flux111_p.estimated_ode_params[1] #6.51608 Particles{Float64, 1}# 6.42945Particles{Float64, 1}
+# sol2flux222_p = solve(prob, alg)
+# sol2flux222_p.estimated_ode_params[1] #6.4875 Particles{Float64, 1} # 6.44524Particles{Float64, 1}
+# sol2flux333_p = solve(prob, alg)
+# sol2flux333_p.estimated_ode_params[1] #6.51679 Particles{Float64, 1}# 6.43152Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood, dataset(1.0-2.0)
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux1 = solve(prob, alg)
+# sol1flux1.estimated_ode_params[1] #6.35164 Particles{Float64, 1}
+# sol1flux2 = solve(prob, alg)
+# sol1flux2.estimated_ode_params[1] #6.30919 Particles{Float64, 1}
+# sol1flux3 = solve(prob, alg)
+# sol1flux3.estimated_ode_params[1] #6.33554 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux11 = solve(prob, alg)
+# sol1flux11.estimated_ode_params[1] #6.39769 Particles{Float64, 1}
+# sol1flux22 = solve(prob, alg)
+# sol1flux22.estimated_ode_params[1] #6.43924 Particles{Float64, 1}
+# sol1flux33 = solve(prob, alg)
+# sol1flux33.estimated_ode_params[1] #6.4697 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux111 = solve(prob, alg)
+# sol1flux111.estimated_ode_params[1] #6.27812 Particles{Float64, 1}
+# sol1flux222 = solve(prob, alg)
+# sol1flux222.estimated_ode_params[1] #6.19278 Particles{Float64, 1}
+# sol1flux333 = solve(prob, alg)
+# sol1flux333.estimated_ode_params[1] # 9.68244Particles{Float64, 1} (first try) # 6.23969 Particles{Float64, 1}(second try)
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1.0-2.0)
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux1_p = solve(prob, alg)
+# sol1flux1_p.estimated_ode_params[1]#6.36269 Particles{Float64, 1}
+
+# sol1flux2_p = solve(prob, alg)
+# sol1flux2_p.estimated_ode_params[1]#6.34685 Particles{Float64, 1}
+
+# sol1flux3_p = solve(prob, alg)
+# sol1flux3_p.estimated_ode_params[1]#6.31421 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux11_p = solve(prob, alg)
+# sol1flux11_p.estimated_ode_params[1] #6.15725 Particles{Float64, 1}
+
+# sol1flux22_p = solve(prob, alg)
+# sol1flux22_p.estimated_ode_params[1] #6.18145 Particles{Float64, 1}
+
+# sol1flux33_p = solve(prob, alg)
+# sol1flux33_p.estimated_ode_params[1] #6.21905 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux111_p = solve(prob, alg)
+# sol1flux111_p.estimated_ode_params[1]#6.13481 Particles{Float64, 1}
+
+# sol1flux222_p = solve(prob, alg)
+# sol1flux222_p.estimated_ode_params[1]#9.68555 Particles{Float64, 1}
+
+# sol1flux333_p = solve(prob, alg)
+# sol1flux333_p.estimated_ode_params[1]#6.1477 Particles{Float64, 1}
+
+# # -----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1-2), again but different density
+# # 12 points
+# ta = range(1.0, tspan[2], length = 12)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux1_p = solve(prob, alg)
+# sol3flux1_p.estimated_ode_params[1]#6.50048 Particles{Float64, 1}
+# sol3flux2_p = solve(prob, alg)
+# sol3flux2_p.estimated_ode_params[1]#6.57597 Particles{Float64, 1}
+# sol3flux3_p = solve(prob, alg)
+# sol3flux3_p.estimated_ode_params[1]#6.24487 Particles{Float64, 1}
+
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux11_p = solve(prob, alg)
+# sol3flux11_p.estimated_ode_params[1]#6.53093 Particles{Float64, 1}
+
+# sol3flux22_p = solve(prob, alg)
+# sol3flux22_p.estimated_ode_params[1]#6.32744 Particles{Float64, 1}
+
+# sol3flux33_p = solve(prob, alg)
+# sol3flux33_p.estimated_ode_params[1]#6.49175 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux111_p = solve(prob, alg)
+# sol3flux111_p.estimated_ode_params[1]#6.4455 Particles{Float64, 1}
+# sol3flux222_p = solve(prob, alg)
+# sol3flux222_p.estimated_ode_params[1]#6.40736 Particles{Float64, 1}
+# sol3flux333_p = solve(prob, alg)
+# sol3flux333_p.estimated_ode_params[1]#6.46214 Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(0-1)
+# # 25 points
+# ta = range(tspan[1], 1.0, length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux1_p = solve(prob, alg)
+# sol0flux1_p.estimated_ode_params[1]#7.12625 Particles{Float64, 1}
+# sol0flux2_p = solve(prob, alg)
+# sol0flux2_p.estimated_ode_params[1]#8.40948 Particles{Float64, 1}
+# sol0flux3_p = solve(prob, alg)
+# sol0flux3_p.estimated_ode_params[1]#7.18768 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], 1.0, length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux11_p = solve(prob, alg)
+# sol0flux11_p.estimated_ode_params[1]#6.23707 Particles{Float64, 1}
+# sol0flux22_p = solve(prob, alg)
+# sol0flux22_p.estimated_ode_params[1]#6.09728 Particles{Float64, 1}
+# sol0flux33_p = solve(prob, alg)
+# sol0flux33_p.estimated_ode_params[1]#6.12971 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], 1.0, length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux111_p = solve(prob, alg)
+# sol0flux111_p.estimated_ode_params[1]#5.99039 Particles{Float64, 1}
+# sol0flux222_p = solve(prob, alg)
+# sol0flux222_p.estimated_ode_params[1]#5.89609 Particles{Float64, 1}
+# sol0flux333_p = solve(prob, alg)
+# sol0flux333_p.estimated_ode_params[1]#5.91923 Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f1 = solve(prob, alg)
+# sol1f1.estimated_ode_params[1]
+# # 10.9818Particles{Float64, 1}
+# sol1f2 = solve(prob, alg)
+# sol1f2.estimated_ode_params[1]
+# # sol1f3 = solve(prob, alg)
+# # sol1f3.estimated_ode_params[1]
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f11 = solve(prob, alg)
+# sol1f11.estimated_ode_params[1]
+# sol1f22 = solve(prob, alg)
+# sol1f22.estimated_ode_params[1]
+# # sol1f33 = solve(prob, alg)
+# # sol1f33.estimated_ode_params[1]
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f111 = solve(prob, alg)
+# sol1f111.estimated_ode_params[1]
+# sol1f222 = solve(prob, alg)
+# sol1f222.estimated_ode_params[1]
+# # sol1f333 = solve(prob, alg)
+# # sol1f333.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f1_p = solve(prob, alg)
+# sol1f1_p.estimated_ode_params[1]
+# sol1f2_p = solve(prob, alg)
+# sol1f2_p.estimated_ode_params[1]
+# sol1f3_p = solve(prob, alg)
+# sol1f3_p.estimated_ode_params[1]
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f11_p = solve(prob, alg)
+# sol1f11_p.estimated_ode_params[1]
+# sol1f22_p = solve(prob, alg)
+# sol1f22_p.estimated_ode_params[1]
+# sol1f33_p = solve(prob, alg)
+# sol1f33_p.estimated_ode_params[1]
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f111_p = solve(prob, alg)
+# sol1f111_p.estimated_ode_params[1]
+# sol1f222_p = solve(prob, alg)
+# sol1f222_p.estimated_ode_params[1]
+# sol1f333_p = solve(prob, alg)
+# sol1f333_p.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+
+# plot!(title = "9,2.5 50 training 2>full,1>partial")
+
+# p
+# param1
+# # (lux chain)
+# @prob mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 8e-2
+
+# # estimated parameters(lux chain)
+# param1 = sol3lux_pestim.estimated_ode_params[1]
+# @test abs(param1 - p) < abs(0.35 * p)
+
+# p
+# param1
+
+# # # my suggested Loss likelihood part
+# # #  + L2loss2(Tar, θ)
+# # # My suggested extra loss function
+# # function L2loss2(Tar::LogTargetDensity, θ)
+# #     f = Tar.prob.f
+
+# #     # parameter estimation chosen or not
+# #     if Tar.extraparams > 0
+# #         dataset = Tar.dataset
+
+# #         # Timepoints to enforce Physics
+# #         dataset = Array(reduce(hcat, dataset)')
+# #         t = dataset[end, :]
+# #         û = dataset[1:(end - 1), :]
+
+# #         ode_params = Tar.extraparams == 1 ?
+# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+# #         if length(û[:, 1]) == 1
+# #             physsol = [f(û[:, i][1],
+# #                 ode_params,
+# #                 t[i])
+# #                        for i in 1:length(û[1, :])]
+# #         else
+# #             physsol = [f(û[:, i],
+# #                 ode_params,
+# #                 t[i])
+# #                        for i in 1:length(û[1, :])]
+# #         end
+# #         #form of NN output matrix output dim x n
+# #         deri_physsol = reduce(hcat, physsol)
+
+# #         #   OG deriv(basically gradient matching in case of an ODEFunction)
+# #         # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+# #         # if length(û[:, 1]) == 1
+# #         #     deri_sol = [f(û[:, i][1],
+# #         #         Tar.prob.p,
+# #         #         t[i])
+# #         #                 for i in 1:length(û[1, :])]
+# #         # else
+# #         #     deri_sol = [f(û[:, i],
+# #         #         Tar.prob.p,
+# #         #         t[i])
+# #         #                 for i in 1:length(û[1, :])]
+# #         # end
+# #         # deri_sol = reduce(hcat, deri_sol)
+# #         derivatives = calculate_derivatives(Tar.dataset)
+# #         deri_sol = reduce(hcat, derivatives)
+
+# #         physlogprob = 0
+# #         for i in 1:length(Tar.prob.u0)
+# #             # can add phystd[i] for u[i]
+# #             physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+# #                     LinearAlgebra.Diagonal(map(abs2,
+# #                         Tar.l2std[i] .*
+# #                         ones(length(deri_sol[i, :]))))),
+# #                 deri_sol[i, :])
+# #         end
+# #         return physlogprob
+# #     else
+# #         return 0
+# #     end
+# # end
+
+# # function calculate_derivatives(dataset)
+# #     x̂, time = dataset
+# #     num_points = length(x̂)
+
+# #     # Initialize an array to store the derivative values.
+# #     derivatives = similar(x̂)
+
+# #     for i in 2:(num_points - 1)
+# #         # Calculate the first-order derivative using central differences.
+# #         Δt_forward = time[i + 1] - time[i]
+# #         Δt_backward = time[i] - time[i - 1]
+
+# #         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+# #         derivatives[i] = derivative
+# #     end
+
+# #     # Derivatives at the endpoints can be calculated using forward or backward differences.
+# #     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+# #     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+
+# #     return derivatives
+# # end
+
+# size(dataset[1])
+# # Problem 1 with param estimation(flux,lux)
+# # Normal
+# # 6.20311 Particles{Float64, 1},6.21746Particles{Float64, 1}
+# # better
+# # 6.29093Particles{Float64, 1}, 6.27925Particles{Float64, 1}
+# # Non ideal case
+# # 6.14861Particles{Float64, 1}, 
+# sol2flux.estimated_ode_params
+# sol2lux.estimated_ode_params[1]
+# p
+# size(sol3flux_pestim.ensemblesol[2])
+# plott = sol3flux_pestim.ensemblesol[1]
+# using StatsPlots
+# plotly()
+# plot(t, sol3flux_pestim.ensemblesol[1])
+
+# function calculate_derivatives(dataset)
+#     x̂, time = dataset
+#     num_points = length(x̂)
+
+#     # Initialize an array to store the derivative values.
+#     derivatives = similar(x̂)
+
+#     for i in 2:(num_points - 1)
+#         # Calculate the first-order derivative using central differences.
+#         Δt_forward = time[i + 1] - time[i]
+#         Δt_backward = time[i] - time[i - 1]
+
+#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+#         derivatives[i] = derivative
+#     end
+
+#     # Derivatives at the endpoints can be calculated using forward or backward differences.
+#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+
+#     return derivatives
+# end
+
+# # Example usage:
+# # dataset = [x̂, time]
+# derivatives = calculate_derivatives(dataset)
+# dataset[1]
+# # Access derivative values at specific time points as needed.
+
+# # # 9,0.5
+# # 0.09894916260292887
+# # 0.09870335436072103
+# # 0.08398556878067913
+# # 0.10109070099105527
+# # 0.09122683737517055
+# # 0.08614958011892977
+# # mean(abs.(x̂ .- meanscurve1)) #0.017112298305523976
+# # mean(abs.(physsol1 .- meanscurve1)) #0.004038636894341354
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1))#0.01800876370000113
+# # mean(abs.(physsol1 .- meanscurve1))#0.007285681280600875
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.10599926120358046
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10375554193397989
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.10160824458252521
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09999942538357891
+
+# # # ------------------------------------------------normale
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.0333356493928835
+# # mean(abs.(physsol1 .- meanscurve1)) #0.02721733876400459
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.020734206709433347
+# # mean(abs.(physsol1 .- meanscurve1)) #0.012502850740700212
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.10615859683094729
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10508141153722575
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.10833514946031565
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10668470203219232
+
+# # # 9,0.5
+# # 10.158108285475553
+# # 10.207234384538026
+# # 10.215000657664852
+# # 10.213817644016174
+# # 13.380030074088719
+# # 13.348906350967326
+
+# # 6.952731422892041
+
+# # # All losses
+# # 10.161478523326277
+# # # L2 losses 1
+# # 9.33312996960278
+# # # L2 losses 2
+# # 10.217417241370631
+
+# # mean([fhsamples1[i][26] for i in 500:1000]) #6.245045767509431
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.212522300650451
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #35.328636809737695
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #35.232963812125654
+
+# # # ---------------------------------------normale
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.547771572198114
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.158906185002702
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.210400972620185
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.153845019454522
+
+# # # ----------------more dataset normale -----------------------------
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.271141178216537
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.241144692919369
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.124480447973127
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.07838011629903
+
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.016551602015599295
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0021488618484224245
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.017022725082640747
+# # mean(abs.(physsol1 .- meanscurve1)) #0.004339761917100232
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.09668785317864312
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09430712337543362
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.09958118358974392
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09717454226368502
+
+# # # ----------------more dataset special -----------------------------
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.284355334485365
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.259238106698602
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.139808934336987
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.03921327641226
+
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.016627231605546876
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0020311429130039564
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.016650324577507352
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0027537543411154677
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.09713187937270151
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.09550234866855814
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
+
+# # using Plots, StatsPlots
+# # plotly()
+
+# # ---------------------------------------------------------
+# # # # Distribution abstract in wrapper, dataset Float64
+# # # 268.651 s (206393690 allocations: 388.71 GiB)
+# # # 318.170551 seconds (206.29 M allocations: 388.453 GiB, 20.83% gc time)
+
+# # # # Above with dataset Real subtype
+# # # 326.201 s (206327409 allocations: 388.42 GiB)
+# # # 363.189370 seconds (206.25 M allocations: 387.975 GiB, 15.77% gc time)
+# # # 306.171 s (206321277 allocations: 388.55 GiB)
+# # # 356.180699 seconds (206.43 M allocations: 388.361 GiB, 13.77% gc time)
+
+# # # # Above with dataset AbstractFloat subtype
+# # # 290.751187 seconds (205.94 M allocations: 387.955 GiB, 12.92% gc time)
+# # # 296.319815 seconds (206.38 M allocations: 388.730 GiB, 12.69% gc time)
+
+# # # # ODEProblem float64 dtaset and vector distri inside
+# # #   273.169 s (206128318 allocations: 388.40 GiB)
+# # #   274.059531 seconds (205.91 M allocations: 387.953 GiB, 12.77% gc time)
+
+# # # #   Dataset float64 inside and vector distri outsude
+# # #   333.603 s (206251143 allocations: 388.41 GiB)
+# # # 373.377222 seconds (206.11 M allocations: 387.968 GiB, 13.25% gc time)
+# # #   359.745 s (206348301 allocations: 388.41 GiB)
+# # # 357.813114 seconds (206.31 M allocations: 388.354 GiB, 13.54% gc time)
+
+# # # # Dataset float64 inside and vector distri inside
+# # #   326.437 s (206253571 allocations: 388.41 GiB)
+# # #   290.334083 seconds (205.92 M allocations: 387.954 GiB, 13.82% gc time)
+
+# # # # current setting
+# # # 451.304 s (206476927 allocations: 388.43 GiB)
+# # # 384.532732 seconds (206.22 M allocations: 387.976 GiB, 13.17% gc time)
+# # # 310.223 s (206332558 allocations: 388.63 GiB)
+# # # 344.243889 seconds (206.34 M allocations: 388.409 GiB, 13.84% gc time)
+# # # 357.457737 seconds (206.66 M allocations: 389.064 GiB, 18.16% gc time)
+
+# # # # shit setup
+# # #   325.595 s (206283732 allocations: 388.41 GiB)
+# # # 334.248753 seconds (206.06 M allocations: 387.964 GiB, 12.60% gc time)
+# # #   326.011 s (206370857 allocations: 388.56 GiB)
+# # # 327.203339 seconds (206.29 M allocations: 388.405 GiB, 12.92% gc time)
+
+# # # # in wrapper Distribution prior, insiade FLOAT64 DATASET
+# # # 325.158167 seconds (205.97 M allocations: 387.958 GiB, 15.07% gc time) 
+# # #   429.536 s (206476324 allocations: 388.43 GiB)
+# # #   527.364 s (206740343 allocations: 388.58 GiB)
+
+# # # #   wrapper Distribtuion, inside Float64
+# # # 326.017 s (206037971 allocations: 387.96 GiB)
+# # # 347.424730 seconds (206.45 M allocations: 388.532 GiB, 12.92% gc time)
+
+# # # 439.047568 seconds (284.24 M allocations: 392.598 GiB, 15.25% gc time, 14.36% compilation time: 0% of which was recompilation)
+# # # 375.472142 seconds (206.40 M allocations: 388.529 GiB, 14.93% gc time)
+# # # 374.888820 seconds (206.34 M allocations: 388.346 GiB, 14.09% gc time)
+# # # 363.719611 seconds (206.39 M allocations: 388.581 GiB, 15.08% gc time)
+# # # # inside Distribtion, instide Float64
+# # #   310.238 s (206324249 allocations: 388.53 GiB)
+# # #   308.991494 seconds (206.34 M allocations: 388.549 GiB, 14.01% gc time)
+# # #   337.442 s (206280712 allocations: 388.36 GiB)
+# # #   299.983096 seconds (206.29 M allocations: 388.512 GiB, 17.14% gc time)
+
+# # #   394.924357 seconds (206.27 M allocations: 388.337 GiB, 23.68% gc time)
+# # # 438.204179 seconds (206.39 M allocations: 388.470 GiB, 23.84% gc time)
+# # #   376.626914 seconds (206.46 M allocations: 388.693 GiB, 18.72% gc time)
+# # # 286.863795 seconds (206.14 M allocations: 388.370 GiB, 18.80% gc time)
+# # #   285.556929 seconds (206.22 M allocations: 388.371 GiB, 17.04% gc time)
+# # #   291.471662 seconds (205.96 M allocations: 388.068 GiB, 19.85% gc time)
+
+# # # 495.814341 seconds (284.62 M allocations: 392.622 GiB, 12.56% gc time, 10.96% compilation time: 0% of which was recompilation)
+# # # 361.530617 seconds (206.36 M allocations: 388.526 GiB, 14.98% gc time)
+# # # 348.576065 seconds (206.22 M allocations: 388.337 GiB, 15.01% gc time)
+# # # 374.575609 seconds (206.45 M allocations: 388.586 GiB, 14.65% gc time)
+# # # 314.223008 seconds (206.23 M allocations: 388.411 GiB, 14.63% gc time)
+
+# # PROBLEM-3 LOTKA VOLTERRA EXAMPLE [WIP] (WITH PARAMETER ESTIMATION)(will be put in tutorial page)
+# function lotka_volterra(u, p, t)
+#     # Model parameters.
+#     α, β, γ, δ = p
+#     # Current state.
+#     x, y = u
+
+#     # Evaluate differential equations.
+#     dx = (α - β * y) * x # prey
+#     dy = (δ * x - γ) * y # predator
+
+#     return [dx, dy]
+# end
+
+# u0 = [1.0, 1.0]
+# p = [1.5, 1.0, 3.0, 1.0]
+# tspan = (0.0, 6.0)
+# prob = ODEProblem(lotka_volterra, u0, tspan, p)
+# solution = solve(prob, Tsit5(); saveat = 0.05)
+
+# as = reduce(hcat, solution.u)
+# as[1, :]
+# # Plot simulation.
+# time = solution.t
+# u = hcat(solution.u...)
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x = u[1, :] + 0.5 * randn(length(u[1, :]))
+# y = u[2, :] + 0.5 * randn(length(u[1, :]))
+# dataset = [x[1:50], y[1:50], time[1:50]]
+# # scatter!(time, [x, y])
+# # scatter!(dataset[3], [dataset[2], dataset[1]])
+
+# # NN has 2 outputs as u -> [dx,dy]
+# chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
+#     Lux.Dense(6, 2))
+# chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
+
+# # fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+# #                                                                           dataset = dataset,
+# #                                                                           draw_samples = 1000,
+# #                                                                           l2std = [
+# #                                                                               0.05,
+# #                                                                               0.05,
+# #                                                                           ],
+# #                                                                           phystd = [
+# #                                                                               0.05,
+# #                                                                               0.05,
+# #                                                                           ],
+# #                                                                           priorsNNw = (0.0,
+# #          
+
+# #   3.0))
+
+# # check if NN output is more than 1
+# # numoutput = size(luxar[1])[1]
+# # if numoutput > 1
+# #     # Initialize a vector to store the separated outputs for each output dimension
+# #     output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput]
+
+# #     # Loop through each element in the `as` vector
+# #     for element in as
+# #         for i in 1:numoutput
+# #             push!(output_matrices[i], element[i, :])  # Append the i-th output (i-th row) to the i-th output_matrices
+# #         end
+# #     end
+
+# #     ensemblecurves = Vector{}[]
+# #     for r in 1:numoutput
+# #         br = hcat(output_matrices[r]...)'
+# #         ensemblecurve = prob.u0[r] .+
+# #                         [Particles(br[:, i]) for i in 1:length(t)] .*
+# #                         (t .- prob.tspan[1])
+# #         push!(ensemblecurves, ensemblecurve)
+# #     end
+
+# # else
+# #     # ensemblecurve = prob.u0 .+
+# #     #                 [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .*
+# #     #                 (t .- prob.tspan[1])
+# #     print("yuh")
+# # end
+
+# # fhsamplesflux2
+# # nnparams = length(init1)
+# # estimnnparams = [Particles(reduce(hcat, fhsamplesflux2)[i, :]) for i in 1:nnparams]
+# # ninv=4
+# # estimated_params = [Particles(reduce(hcat, fhsamplesflux2[(end - ninv + 1):end])[i, :])
+# #                     for i in (nnparams + 1):(nnparams + ninv)]
+# # output_matrices[r]
+# # br = hcat(output_matrices[r]...)'
+
+# # br[:, 1]
+
+# # [Particles(br[:, i]) for i in 1:length(t)]
+# # prob.u0
+# # [Particles(br[:, i]) for i in 1:length(t)] .*
+# # (t .- prob.tspan[1])
+
+# # ensemblecurve = prob.u0[r] .+
+# #                 [Particles(br[:, i]) for i in 1:length(t)] .*
+# #                 (t .- prob.tspan[1])
+# # push!(ensemblecurves, ensemblecurve)
+
+# using StatsPlots
+# plotly()
+# plot(t, ensemblecurve)
+# plot(t, ensemblecurves[1])
+# plot!(t, ensemblecurves[2])
+# ensemblecurve
+# ensemblecurves[1]
+# fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(1.5,
+#             0.5),
+#         Normal(1.2,
+#             0.5),
+#         Normal(3.3,
+#             0.5),
+#         Normal(1.4,
+#             0.5),
+#     ], progress = true)
+
+# alg = NeuralPDE.BNNODE(chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(4.5,
+#             5),
+#         Normal(7,
+#             2),
+#         Normal(5,
+#             2),
+#         Normal(-4,
+#             6),
+#     ],
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux_pestim = solve(prob, alg)
+
+# # OG PARAM VALUES
+# [1.5, 1.0, 3.0, 1.0]
+# # less
+# # [1.34, 7.51, 2.54, -2.55]
+# # better
+# # [1.48, 0.993, 2.77, 0.954]
+
+# sol3flux_pestim.es
+# sol3flux_pestim.estimated_ode_params
+# # fh_mcmc_chainlux1, fhsampleslux1, fhstatslux1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                                        dataset = dataset,
+# #                                                                        draw_samples = 1000,
+# #                                                                        l2std = [0.05, 0.05],
+# #                                                                        phystd = [
+# #                                                                            0.05,
+# #                                                                            0.05,
+# #                                                                        ],
+# #                                                                        priorsNNw = (0.0,
+# #                                                                                     3.0))
+
+# # fh_mcmc_chainlux2, fhsampleslux2, fhstatslux2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                                        dataset = dataset,
+# #                                                                        draw_samples = 1000,
+# #                                                                        l2std = [0.05, 0.05],
+# #                                                                        phystd = [
+# #                                                                            0.05,
+# #                                                                            0.05,
+# #                                                                        ],
+# #                                                                        priorsNNw = (0.0,
+# #                                                                                     3.0),
+# #                                                                        param = [
+# #                                                                            Normal(1.5, 0.5),
+# #                                                                            Normal(1.2, 0.5),
+# #                                                                            Normal(3.3, 0.5),
+# #                                                                            Normal(1.4, 0.5),
+# #                                                                        ])
+
+# init1, re1 = destructure(chainflux1)
+# θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+# #   PLOT testing points
+# t = time
+# p = prob.p
+# collect(Float64, vcat(ComponentArrays.ComponentArray(θinit)))
+# collect(Float64, ComponentArrays.ComponentArray(θinit))
+# # Mean of last 1000 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+# out = re1.([fhsamplesflux1[i][1:68] for i in 500:1000])
+# yu = [out[i](t') for i in eachindex(out)]
+
+# function getensemble(yu, num_models)
+#     num_rows, num_cols = size(yu[1])
+#     row_means = zeros(Float32, num_rows, num_cols)
+#     for i in 1:num_models
+#         row_means .+= yu[i]
+#     end
+#     row_means ./ num_models
+# end
+# fluxmean = getensemble(yu, length(out))
+# meanscurve1_1 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
+# mean(abs.(u .- meanscurve1_1))
+
+# plot!(t, physsol1)
+# @test mean(abs2.(x̂ .- meanscurve1_1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# out = re1.([fhsamplesflux2[i][1:68] for i in 500:1000])
+# yu = collect(out[i](t') for i in eachindex(out))
+# fluxmean = getensemble(yu, length(out))
+# meanscurve1_2 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
+# mean(abs.(u .- meanscurve1_2))
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# θ = [vector_to_parameters(fhsampleslux1[i][1:(end - 4)], θinit) for i in 500:1000]
+# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+# meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# θ = [vector_to_parameters(fhsampleslux2[i][1:(end - 4)], θinit) for i in 500:1000]
+# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+# meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# # # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+# @test abs(p - mean([fhsamplesflux2[i][69] for i in 500:1000])) < 0.1 * p[1]
+# @test abs(p - mean([fhsampleslux2[i][69] for i in 500:1000])) < 0.2 * p[1]
+
+# # @test abs(p - mean([fhsamplesflux2[i][70] for i in 500:1000])) < 0.1 * p[2]
+# # @test abs(p - mean([fhsampleslux2[i][70] for i in 500:1000])) < 0.2 * p[2]
+
+# # @test abs(p - mean([fhsamplesflux2[i][71] for i in 500:1000])) < 0.1 * p[3]
+# # @test abs(p - mean([fhsampleslux2[i][71] for i in 500:1000])) < 0.2 * p[3]
+
+# # @test abs(p - mean([fhsamplesflux2[i][72] for i in 500:1000])) < 0.1 * p[4]
+# # @test abs(p - mean([fhsampleslux2[i][72] for i in 500:1000])) < 0.2 * p[4]
+
+# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                               dataset = dataset,
+# #                                                               draw_samples = 1000,
+# #                                                               l2std = [0.05, 0.05],
+# #                                                               phystd = [0.05, 0.05],
+# #                                                               priorsNNw = (0.0, 3.0),
+# #                                                               param = [
+# #                                                                   Normal(1.5, 0.5),
+# #                                                                   Normal(1.2, 0.5),
+# #                                                                   Normal(3.3, 0.5),
+# #                                                                   Normal(1.4, 0.5),
+# #                                                               ], autodiff = true)
+
+# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                               dataset = dataset,
+# #                                                               draw_samples = 1000,
+# #                                                               l2std = [0.05, 0.05],
+# #                                                               phystd = [0.05, 0.05],
+# #                                                               priorsNNw = (0.0, 3.0),
+# #                                                               param = [
+# #                                                                   Normal(1.5, 0.5),
+# #                                                                   Normal(1.2, 0.5),
+# #                                                                   Normal(3.3, 0.5),
+# #                                                                   Normal(1.4, 0.5),
+# #                                                               ], nchains = 2)
+
+# # NOTES (WILL CLEAR LATER)
+# # --------------------------------------------------------------------------------------------
+# # Hamiltonian energy must be lowest(more paramters the better is it to map onto them)
+# # full better than L2 and phy individual(test)
+# # in mergephys more points after training points is better from 20->40
+# # does consecutive runs bceome better? why?(plot 172)(same chain maybe)
+# # does density of points in timespan matter dataset vs internal timespan?(plot 172)(100+0.01)
+# # when training from 0-1 and phys from 1-5 with 1/150 simple nn slow,but bigger nn faster decrease in Hmailtonian
+# # bigger time interval more curves to adapt to only more parameters adapt to that, better NN architecture
+# # higher order logproblems solve better
+# # repl up up are same instances? but reexecute calls are new?
+
+# #Compare results against paper example
+# # Lux chains support (DONE)
+# # fix predictions for odes depending upon 1,p in f(u,p,t)(DONE)
+# # lotka volterra learn curve beyond l2 losses(L2 losses determine accuracy of parameters)(parameters cant run free ∴ L2 interval only)
+# # check if prameters estimation works(YES)
+# # lotka volterra parameters estimate (DONE)
+
+# using NeuralPDE, Lux, Flux, Optimization, OptimizationOptimJL
+# import ModelingToolkit: Interval
+# using Plots, StatsPlots
+# plotly()
+# # Profile.init()
+
+# @parameters x y
+# @variables u(..)
+# Dxx = Differential(x)^2
+# Dyy = Differential(y)^2
+
+# # 2D PDE
+# eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# # Boundary conditions
+# bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+#     u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+# # Space and time domains
+# domains = [x ∈ Interval(0.0, 1.0),
+#     y ∈ Interval(0.0, 1.0)]
+
+# # Neural network
+# dim = 2 # number of dimensions
+# chain = Flux.Chain(Flux.Dense(dim, 16, Lux.σ), Flux.Dense(16, 16, Lux.σ), Flux.Dense(16, 1))
+# θ, re = destructure(chain)
+# # Discretization
+# dx = 0.05
+# discretization = PhysicsInformedNN(chain, GridTraining(dx))
+
+# @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+# pinnrep = symbolic_discretize(pde_system, discretization)
+# typeof(pinnrep.phi)
+# typeof(pinnrep.phi)
+# typeof(re)
+# pinnrep.phi([1, 2], θ)
+
+# typeof(θ)
+
+# print(pinnrep)
+# pinnrep.eqs
+# pinnrep.bcs
+# pinnrep.domains
+# pinnrep.eq_params
+# pinnrep.defaults
+# print(pinnrep.default_p)
+# pinnrep.param_estim
+# print(pinnrep.additional_loss)
+# pinnrep.adaloss
+# pinnrep.depvars
+# pinnrep.indvars
+# pinnrep.dict_depvar_input
+# pinnrep.dict_depvars
+# pinnrep.dict_indvars
+# print(pinnrep.logger)
+# pinnrep.multioutput
+# pinnrep.iteration
+# pinnrep.init_params
+# pinnrep.flat_init_params
+# pinnrep.phi
+# pinnrep.derivative
+# pinnrep.strategy
+# pinnrep.pde_indvars
+# pinnrep.bc_indvars
+# pinnrep.pde_integration_vars
+# pinnrep.bc_integration_vars
+# pinnrep.integral
+# pinnrep.symbolic_pde_loss_functions
+# pinnrep.symbolic_bc_loss_functions
+# pinnrep.loss_functions
+
+# #  = discretize(pde_system, discretization)
+# prob = symbolic_discretize(pde_system, discretization)
+# # "The boundary condition loss functions"
+# sum([prob.loss_functions.bc_loss_functions[i](θ) for i in eachindex(1:4)])
+# sum([prob.loss_functions.pde_loss_functions[i](θ) for i in eachindex(1)])
+
+# prob.loss_functions.full_loss_function(θ, 32)
+
+# prob.loss_functions.bc_loss_functions[1](θ)
+
+# prob.loss_functions.bc_loss_functions
+# prob.loss_functions.full_loss_function
+# prob.loss_functions.additional_loss_function
+# prob.loss_functions.pde_loss_functions
+
+# 1.3953060473003345 + 1.378102161087438 + 1.395376727128639 + 1.3783868705075002 +
+# 0.22674532775196876
+# # "The PDE loss functions"
+# prob.loss_functions.pde_loss_functions
+# prob.loss_functions.pde_loss_functions[1](θ)
+# # "The full loss function, combining the PDE and boundary condition loss functions.This is the loss function that is used by the optimizer."
+# prob.loss_functions.full_loss_function(θ, nothing)
+# prob.loss_functions.full_loss_function(θ, 423423)
+
+# # "The wrapped `additional_loss`, as pieced together for the optimizer."
+# prob.loss_functions.additional_loss_function
+# # "The pre-data version of the PDE loss function"
+# prob.loss_functions.datafree_pde_loss_functions
+# # "The pre-data version of the BC loss function"
+# prob.loss_functions.datafree_bc_loss_functions
+
+# using Random
+# θ, st = Lux.setup(Random.default_rng(), chain)
+# #Optimizer
+# opt = OptimizationOptimJL.BFGS()
+
+# #Callback function
+# callback = function (p, l)
+#     println("Current loss is: $l")
+#     return false
+# end
+
+# res = Optimization.solve(prob, opt, callback = callback, maxiters = 1000)
+# phi = discretization.phi
+
+# # ------------------------------------------------
+# using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, OrdinaryDiffEq,
+#       Plots
+# import ModelingToolkit: Interval, infimum, supremum
+# @parameters t, σ_, β, ρ
+# @variables x(..), y(..), z(..)
+# Dt = Differential(t)
+# eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+#     Dt(y(t)) ~ x(t) * (ρ - z(t)) - y(t),
+#     Dt(z(t)) ~ x(t) * y(t) - β * z(t)]
+
+# bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+# domains = [t ∈ Interval(0.0, 1.0)]
+# dt = 0.01
+
+# input_ = length(domains)
+# n = 8
+# chain1 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+# chain2 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+# chain3 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+
+# function lorenz!(du, u, p, t)
+#     du[1] = 10.0 * (u[2] - u[1])
+#     du[2] = u[1] * (28.0 - u[3]) - u[2]
+#     du[3] = u[1] * u[2] - (8 / 3) * u[3]
+# end
+
+# u0 = [1.0; 0.0; 0.0]
+# tspan = (0.0, 1.0)
+# prob = ODEProblem(lorenz!, u0, tspan)
+# sol = solve(prob, Tsit5(), dt = 0.1)
+# ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
+# function getData(sol)
+#     data = []
+#     us = hcat(sol(ts).u...)
+#     ts_ = hcat(sol(ts).t...)
+#     return [us, ts_]
+# end
+# data = getData(sol)
+
+# (u_, t_) = data
+# len = length(data[2])
+
+# depvars = [:x, :y, :z]
+# function additional_loss(phi, θ, p)
+#     return sum(sum(abs2, phi[i](t_, θ[depvars[i]]) .- u_[[i], :]) / len for i in 1:1:3)
+# end
+
+# discretization = NeuralPDE.PhysicsInformedNN([chain1, chain2, chain3],
+#                                              NeuralPDE.GridTraining(dt),
+#                                              param_estim = false,
+#                                              additional_loss = additional_loss)
+# @named pde_system = PDESystem(eqs, bcs, domains, [t], [x(t), y(t), z(t)], [σ_, ρ, β],
+#                               defaults = Dict([p .=> 1.0 for p in [σ_, ρ, β]]))
+# prob = NeuralPDE.discretize(pde_system, discretization)
+# callback = function (p, l)
+#     println("Current loss is: $l")
+#     return false
+# end
+# res = Optimization.solve(prob, BFGS(); callback = callback, maxiters = 5000)
+# p_ = res.u[(end - 2):end] # p_ = [9.93, 28.002, 2.667]
+
+# minimizers = [res.u.depvar[depvars[i]] for i in 1:3]
+# ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
+# u_predict = [[discretization.phi[i]([t], minimizers[i])[1] for t in ts] for i in 1:3]
+# plot(sol)
+# plot!(ts, u_predict, label = ["x(t)" "y(t)" "z(t)"])
+
+# discretization.multioutput
+# discretization.chain
+# discretization.strategy
+# discretization.init_params
+# discretization.phi
+# discretization.derivative
+# discretization.param_estim
+# discretization.additional_loss
+# discretization.adaptive_loss
+# discretization.logger
+# discretization.log_options
+# discretization.iteration
+# discretization.self_increment
+# discretization.multioutput
+# discretization.kwargs
+
+# struct BNNODE1{P <: Vector{<:Distribution}}
+#     chain::Any
+#     Kernel::Any
+#     draw_samples::UInt32
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE1(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
+#                      l2std = [0.05], phystd = [0.05])
+#         BNNODE1(chain, Kernel, draw_samples, priorsNNw, param, l2std, phystd)
+#     end
+# end
+
+# struct BNNODE3{C, K, P <: Union{Any, Vector{<:Distribution}}}
+#     chain::C
+#     Kernel::K
+#     draw_samples::UInt32
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE3(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
+#                      l2std = [0.05], phystd = [0.05])
+#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain, Kernel, draw_samples,
+#                                                           priorsNNw, param, l2std, phystd)
+#     end
+# end
+# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+# linear = (u, p, t) -> cos(2 * π * t)
+# tspan = (0.0, 2.0)
+# u0 = 0.0
+# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+
+# ta = range(tspan[1], tspan[2], length = 300)
+# u = [linear_analytic(u0, nothing, ti) for ti in ta]
+# sol1 = solve(prob, Tsit5())
+
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂[1:100], time[1:100]]
+
+# # Call BPINN, create chain
+# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
+# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+# HMC
+# solve(prob, BNNODE(chainflux, HMC))
+# BNNODE1(chainflux, HMC, 2000)
+
+# draw_samples = 2000
+# priorsNNw = (0.0, 3.0)
+# param = []
+# l2std = [0.05]
+# phystd = [0.05]
+# @time BNNODE3(chainflux, HMC, draw_samples = 2000, priorsNNw = (0.0, 3.0),
+#               param = [nothing],
+#               l2std = [0.05], phystd = [0.05])
+# typeof(Nothing) <: Vector{<:Distribution}
+# Nothing <: Distribution
+# {UnionAll} <: Distribution
+# @time [Nothing]
+# typeof([Nothing])
+# @time [1]
+
+# function test1(sum; c = 23, d = 32)
+#     return sum + c + d
+# end
+# function test(a, b; c, d)
+#     return test1(a + b, c, d)
+# end
+
+# test(2, 2)
+
+# struct BNNODE3{C, K, P <: Union{Vector{Nothing}, Vector{<:Distribution}}}
+#     chain::C
+#     Kernel::K
+#     draw_samples::Int64
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE3(chain, Kernel; draw_samples,
+#                      priorsNNw, param = [nothing], l2std, phystd)
+#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain,
+#                                                           Kernel,
+#                                                           draw_samples,
+#                                                           priorsNNw,
+#                                                           param, l2std,
+#                                                           phystd)
+#     end
+# end
+
+# function solve1(prob::DiffEqBase.AbstractODEProblem, alg::BNNODE3;
+#                 dataset = [nothing], dt = 1 / 20.0,
+#                 init_params = nothing, nchains = 1,
+#                 autodiff = false, Integrator = Leapfrog,
+#                 Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
+#                 Metric = DiagEuclideanMetric, jitter_rate = 3.0,
+#                 tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
+#                 n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = true,
+#                 verbose = false)
+#     chain = alg.chain
+#     l2std = alg.l2std
+#     phystd = alg.phystd
+#     priorsNNw = alg.priorsNNw
+#     Kernel = alg.Kernel
+#     draw_samples = alg.draw_samples
+
+#     param = alg.param == [nothing] ? [] : alg.param
+#     mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain, dataset = dataset,
+#                                                             draw_samples = draw_samples,
+#                                                             init_params = init_params,
+#                                                             physdt = dt, l2std = l2std,
+#                                                             phystd = phystd,
+#                                                             priorsNNw = priorsNNw,
+#                                                             param = param,
+#                                                             nchains = nchains,
+#                                                             autodiff = autodiff,
+#                                                             Kernel = Kernel,
+#                                                             Integrator = Integrator,
+#                                                             Adaptor = Adaptor,
+#                                                             targetacceptancerate = targetacceptancerate,
+#                                                             Metric = Metric,
+#                                                             jitter_rate = jitter_rate,
+#                                                             tempering_rate = tempering_rate,
+#                                                             max_depth = max_depth,
+#                                                             Δ_max = Δ_max,
+#                                                             n_leapfrog = n_leapfrog, δ = δ,
+#                                                             λ = λ, progress = progress,
+#                                                             verbose = verbose)
+# end
+
+# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+# linear = (u, p, t) -> cos(2 * π * t)
+# tspan = (0.0, 2.0)
+# u0 = 0.0
+# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+
+# ta = range(tspan[1], tspan[2], length = 300)
+# u = [linear_analytic(u0, nothing, ti) for ti in ta]
+# # sol1 = solve(prob, Tsit5())
+
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂[1:100], time[1:100]]
+
+# # Call BPINN, create chain
+# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
+# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+# HMC
+
+# solve1(prob, a)
+# a = BNNODE3(chainflux, HMC, draw_samples = 2000,
+#             priorsNNw = (0.0, 3.0),
+#             l2std = [0.05], phystd = [0.05])
+
+# Define Lotka-Volterra model.
+function lotka_volterra1(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 6.0)
+prob = ODEProblem(lotka_volterra1, u0, tspan, p)
+solution = solve(prob, Tsit5(); saveat = 0.05)
+
+as = reduce(hcat, solution.u)
+as[1, :]
+# Plot simulation.
+time = solution.t
+u = hcat(solution.u...)
+# BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+x = u[1, :] + 0.5 * randn(length(u[1, :]))
+y = u[2, :] + 0.5 * randn(length(u[1, :]))
+dataset = [x[1:50], y[1:50], time[1:50]]
+# scatter!(time, [x, y])
+# scatter!(dataset[3], [dataset[2], dataset[1]])
+
+# NN has 2 outputs as u -> [dx,dy]
+chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
+    Lux.Dense(6, 2))
+chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
+
+fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0, 3.0), progress = true)
+ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0, 3.0), progress = true)
+
+#     2×171 Matrix{Float64}:
+#  -0.5  -0.518956  -0.529639  …  -1.00266  -1.01049
+#   2.0   1.97109    1.92747       0.42619   0.396335
+
+#     2-element Vector{Float64}:
+#  -119451.94949911036
+#  -128543.23714618056
+
+# alg = NeuralPDE.BNNODE(chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(4.5,
+#             5),
+#         Normal(7,
+#             2),
+#         Normal(5,
+#             2),
+#         Normal(-4,
+#             6),
+#     ],
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux_pestim = solve(prob, alg)
+
+#  ----------------------------------------------
+# original paper implementation
+# 25 points 
+run1  #7.70593 Particles{Float64, 1}
+run2 #6.66347 Particles{Float64, 1} 
+run3 #6.84827 Particles{Float64, 1} 
+
+# 50 points 
+run1 #7.83577 Particles{Float64, 1}
+run2 #6.49477 Particles{Float64, 1}
+run3 #6.47421 Particles{Float64, 1}
+
+# 100 points 
+run1 #5.96604 Particles{Float64, 1}
+run2 #6.05432 Particles{Float64, 1}
+run3 #6.08856 Particles{Float64, 1}
+
+# Full likelihood(uses total variation regularized differentiation) 
+# 25 points 
+run1 #6.41722 Particles{Float64, 1}
+run2 #6.42782 Particles{Float64, 1}
+run3 #6.42782 Particles{Float64, 1}
+
+# 50 points
+run1 #5.71268 Particles{Float64, 1}
+run2 #5.74599 Particles{Float64, 1}
+run3 #5.74599 Particles{Float64, 1}
+
+# 100 points  
+run1 #6.59097 Particles{Float64, 1}
+run2 #6.62813 Particles{Float64, 1}
+run3 #6.62813 Particles{Float64, 1}
+
+using Plots, StatsPlots
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 6.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+# Plot simulation.
+
+solution = solve(prob, Tsit5(); saveat = 0.05)
+plot(solve(prob, Tsit5()))
+
+# Dataset creation for parameter estimation
+time = solution.t
+u = hcat(solution.u...)
+x = u[1, :] + 0.5 * randn(length(u[1, :]))
+y = u[2, :] + 0.5 * randn(length(u[1, :]))
+dataset = [x, y, time]
+
+# Neural Networks must have 2 outputs as u -> [dx,dy] in function lotka_volterra()
+chainflux = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2)) |>
+            Flux.f64
+
+chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
+
+alg1 = NeuralPDE.BNNODE(chainflux,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol_flux_pestim = solve(prob, alg1)
+
+# Dataset not needed as we are solving the equation with ideal parameters
+alg2 = NeuralPDE.BNNODE(chainlux,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    n_leapfrog = 30, progress = true)
+
+sol_lux = solve(prob, alg2)
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+# plotting solution for x,y for chain_flux
+plot(t, sol_flux_pestim.ensemblesol[1])
+plot!(t, sol_flux_pestim.ensemblesol[2])
+
+plot(sol_flux_pestim.ens1mblesol[1])
+plot!(sol_flux_pestim.ensemblesol[2])
+
+# estimated ODE parameters by .estimated_ode_params, weights and biases by .estimated_nn_params
+sol_flux_pestim.estimated_nn_params
+sol_flux_pestim.estimated_ode_params
+
+# plotting solution for x,y for chain_lux
+plot(t, sol_lux.ensemblesol[1])
+plot!(t, sol_lux.ensemblesol[2])
+
+# estimated weights and biases by .estimated_nn_params for chain_lux
+sol_lux.estimated_nn_params
+
+# # ----------------------------------stats-----------------------------
+# #   ----------------------------
+# # -----------------------------
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:47 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:38 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:12 
+# #   --------------------------
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:05:09 
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:47 
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:25 
+# #   --------------
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:47 
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:54
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:46
+# # ------------------------
+# # -----------------------
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:06
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:32
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:01 
+# # --------------------------
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:02
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:08
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:15
+# # ----------------------------
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:37
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:02
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:13
+
+using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL
+import ModelingToolkit: Interval, infimum, supremum
+
+using NeuralPDE, Flux, OptimizationOptimisers
+
+function diffeq(u, p, t)
+    u1, u2 = u
+    return [u2, p[1] + p[2] * sin(u1) + p[3] * u2]
+end
+p = [5, -10, -1.7]
+u0 = [-1.0, 7.0]
+tspan = (0.0, 10.0)
+prob = ODEProblem(ODEFunction(diffeq), u0, tspan, p)
+
+chainnew = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2)) |>
+           Flux.f64
+
+opt = OptimizationOptimisers.Adam(0.1)
+opt = Optimisers.ADAGrad(0.1)
+opt = Optimisers.AdaMax(0.01)
+algnew = NeuralPDE.NNODE(chainnew, opt)
+solution_new = solve(prob, algnew, verbose = true,
+    abstol = 1e-10, maxiters = 7000)
+u = reduce(hcat, solution_new.u)
+plot(solution_new.t, u[1, :])
+plot!(solution_new.t, u[2, :])
+
+algnew = NeuralPDE.BNNODE(chainnew, draw_samples = 200,
+    n_leapfrog = 30, progress = true)
+solution_new = solve(prob, algnew)
+
+@parameters t
+@variables u1(..), u2(..)
+D = Differential(t)
+eq = [D(u1(t)) ~ u2(t),
+    D(u2(t)) ~ 5 - 10 * sin(u1(t)) - 1.7 * u2(t)];
+
+import ModelingToolkit: Interval
+bcs = [u1(0) ~ -1, u2(0) ~ 7]
+domains = [t ∈ Interval(0.0, 10.0)]
+dt = 0.01
+
+input_ = length(domains) # number of dimensions
+n = 16
+chain = [Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ), Lux.Dense(n, 1))
+         for _ in 1:2]
+
+@named pde_system = PDESystem(eq, bcs, domains, [t], [u1(t), u2(t)])
+
+strategy = NeuralPDE.GridTraining(dt)
+discretization = PhysicsInformedNN(chain, strategy)
+sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+
+pde_loss_functions = sym_prob.loss_functions.pde_loss_functions
+bc_loss_functions = sym_prob.loss_functions.bc_loss_functions
+
+callback = function (p, l)
+    println("loss: ", l)
+    # println("pde_losses: ", map(l_ -> l_(p), pde_loss_functions))
+    # println("bcs_losses: ", map(l_ -> l_(p), bc_loss_functions))
+    return false
+end
+
+loss_functions = [pde_loss_functions; bc_loss_functions]
+
+function loss_function(θ, p)
+    sum(map(l -> l(θ), loss_functions))
+end
+
+f_ = OptimizationFunction(loss_function, Optimization.AutoZygote())
+prob = Optimization.OptimizationProblem(f_, sym_prob.flat_init_params)
+
+res = Optimization.solve(prob,
+    OptimizationOptimJL.BFGS();
+    callback = callback,
+    maxiters = 1000)
+phi = discretization.phi
\ No newline at end of file

From a999ccffab953c47f2bfcc42e07a3bb71ee2dc36 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 20 Jan 2024 23:53:20 +0530
Subject: [PATCH 006/107] removed new files

---
 src/BNNODE_new.jl     |  794 --------
 test/BPINN_newform.jl | 4354 -----------------------------------------
 2 files changed, 5148 deletions(-)
 delete mode 100644 src/BNNODE_new.jl
 delete mode 100644 test/BPINN_newform.jl

diff --git a/src/BNNODE_new.jl b/src/BNNODE_new.jl
deleted file mode 100644
index e6b1f24faa..0000000000
--- a/src/BNNODE_new.jl
+++ /dev/null
@@ -1,794 +0,0 @@
-mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
-    P <: Vector{<:Distribution},
-    D <:
-    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}},
-}
-    dim::Int
-    prob::DiffEqBase.ODEProblem
-    chain::C
-    st::S
-    strategy::ST
-    dataset::D
-    priors::P
-    phystd::Vector{Float64}
-    l2std::Vector{Float64}
-    autodiff::Bool
-    physdt::Float64
-    extraparams::Int
-    init_params::I
-
-    function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
-        dataset,
-        priors, phystd, l2std, autodiff, physdt, extraparams,
-        init_params::AbstractVector)
-        new{
-            typeof(chain),
-            Nothing,
-            typeof(strategy),
-            typeof(init_params),
-            typeof(priors),
-            typeof(dataset),
-        }(dim,
-            prob,
-            chain,
-            nothing, strategy,
-            dataset,
-            priors,
-            phystd,
-            l2std,
-            autodiff,
-            physdt,
-            extraparams,
-            init_params)
-    end
-    function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
-        dataset,
-        priors, phystd, l2std, autodiff, physdt, extraparams,
-        init_params::NamedTuple)
-        new{
-            typeof(chain),
-            typeof(st),
-            typeof(strategy),
-            typeof(init_params),
-            typeof(priors),
-            typeof(dataset),
-        }(dim,
-            prob,
-            chain, st, strategy,
-            dataset, priors,
-            phystd, l2std,
-            autodiff,
-            physdt,
-            extraparams,
-            init_params)
-    end
-end
-
-"""
-cool function to convert parameter's vector to ComponentArray of parameters (for Lux Chain: vector of samples -> Lux ComponentArrays)
-"""
-function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
-    @assert length(ps_new) == Lux.parameterlength(ps)
-    i = 1
-    function get_ps(x)
-        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
-        i += length(x)
-        return z
-    end
-    return Functors.fmap(get_ps, ps)
-end
-
-function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
-    return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
-    #  +  L2loss2(Tar, θ)
-end
-
-LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
-
-function LogDensityProblems.capabilities(::LogTargetDensity)
-    LogDensityProblems.LogDensityOrder{1}()
-end
-
-# suggested extra loss function
-function L2loss2(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        dataset, deri_sol = Tar.dataset
-        # deri_sol = deri_sol'
-        autodiff = Tar.autodiff
-
-        # # Timepoints to enforce Physics
-        # dataset = Array(reduce(hcat, dataset)')
-        # t = dataset[end, :]
-        # û = dataset[1:(end - 1), :]
-
-        # ode_params = Tar.extraparams == 1 ?
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        # if length(û[:, 1]) == 1
-        #     physsol = [f(û[:, i][1],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # else
-        #     physsol = [f(û[:, i],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # end
-        # #form of NN output matrix output dim x n
-        # deri_physsol = reduce(hcat, physsol)
-
-        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
-        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
-        # if length(û[:, 1]) == 1
-        #     deri_sol = [f(û[:, i][1],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # else
-        #     deri_sol = [f(û[:, i],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # end
-        # deri_sol = reduce(hcat, deri_sol) 
-        # deri_sol = reduce(hcat, derivatives)
-
-        # Timepoints to enforce Physics 
-        t = dataset[end]
-        u1 = dataset[2]
-        û = dataset[1]
-        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
-        #  
-
-        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        if length(Tar.prob.u0) == 1
-            physsol = [f(û[i],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        else
-            physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        end
-        #form of NN output matrix output dim x n 
-        deri_physsol = reduce(hcat, physsol)
-
-        # if length(Tar.prob.u0) == 1
-        #     nnsol = [f(û[i],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # else
-        #     nnsol = [f([û[i], u1[i]],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # end
-        # form of NN output matrix output dim x n
-        # nnsol = reduce(hcat, nnsol)
-
-        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
-        # # convert to matrix as nnsol  
-
-        physlogprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # can add phystd[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 4.0) .*
-                        ones(length(nnsol[i, :]))))),
-                nnsol[i, :])
-        end
-        return physlogprob
-    else
-        return 0
-    end
-end
-
-# PDE(DU,U,P,T)=0
-
-# Derivated via Central Diff
-# function calculate_derivatives2(dataset)
-#     x̂, time = dataset
-#     num_points = length(x̂)
-#     # Initialize an array to store the derivative values.
-#     derivatives = similar(x̂)
-
-#     for i in 2:(num_points - 1)
-#         # Calculate the first-order derivative using central differences.
-#         Δt_forward = time[i + 1] - time[i]
-#         Δt_backward = time[i] - time[i - 1]
-
-#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-#         derivatives[i] = derivative
-#     end
-
-#     # Derivatives at the endpoints can be calculated using forward or backward differences.
-#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-#     return derivatives
-# end
-
-function calderivatives(prob, dataset)
-    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
-        Flux.Dense(8, 2)) |> Flux.f64
-    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-    function loss(x, y)
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
-        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
-        sum(Flux.mse.(chainflux(x), y))
-    end
-    optimizer = Flux.Optimise.ADAM(0.01)
-    epochs = 3000
-    for epoch in 1:epochs
-        Flux.train!(loss,
-            Flux.params(chainflux),
-            [(dataset[end]', dataset[1:(end - 1)])],
-            optimizer)
-    end
-
-    # A1 = (prob.u0' .+
-    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
-    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
-
-    # A2 = (prob.u0' .+
-    #       (prob.tspan[2] .- (dataset[end]'))' .*
-    #       chainflux(dataset[end]')')
-
-    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
-    A2 = chainflux(dataset[end]')
-
-    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
-
-    return gradients
-end
-
-function calculate_derivatives(dataset)
-
-    # u = dataset[1]
-    # u1 = dataset[2]
-    # t = dataset[end]
-    # # control points
-    # n = Int(floor(length(t) / 10))
-    # # spline for datasetvalues(solution) 
-    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    # interp = CubicSpline(u, t)
-    # interp1 = CubicSpline(u1, t)
-    # # derrivatives interpolation
-    # dx = t[2] - t[1]
-    # time = collect(t[1]:dx:t[end])
-    # smoothu = [interp(i) for i in time]
-    # smoothu1 = [interp1(i) for i in time]
-    # # derivative of the spline (must match function derivative) 
-    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
-    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
-    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
-    # # FDM
-    # # û1 = diff(u) / dx
-    # # dataset[1] and smoothu are almost equal(rounding errors)
-    # return [û, û1] 
-
-end
-
-"""
-L2 loss loglikelihood(needed for ODE parameter estimation)
-"""
-function L2LossData(Tar::LogTargetDensity, θ)
-    dataset = Tar.dataset
-    # check if dataset is provided
-    if dataset isa Vector{Nothing} || Tar.extraparams == 0
-        return 0
-    else
-        # matrix(each row corresponds to vector u's rows)
-        nn = Tar(dataset[end], θ[1:(length(θ) - Tar.extraparams)])
-
-        L2logprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
-            L2logprob += logpdf(MvNormal(nn[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 0.5) .*
-                        ones(length(dataset[i]))))),
-                dataset[i])
-        end
-        return L2logprob
-    end
-end
-
-"""
-physics loglikelihood over problem timespan + dataset timepoints
-"""
-function physloglikelihood(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-    p = Tar.prob.p
-    tspan = Tar.prob.tspan
-    autodiff = Tar.autodiff
-    strategy = Tar.strategy
-
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-    else
-        ode_params = p == SciMLBase.NullParameters() ? [] : p
-    end
-
-    return getlogpdf(strategy, Tar, f, autodiff, tspan, ode_params, θ)
-end
-
-function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
-    tspan,
-    ode_params, θ)
-    if Tar.dataset isa Vector{Nothing}
-        t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
-    else
-        t = vcat(collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]),
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
-end
-
-function getlogpdf(strategy::StochasticTraining,
-    Tar::LogTargetDensity,
-    f,
-    autodiff::Bool,
-    tspan,
-    ode_params,
-    θ)
-    if Tar.dataset isa Vector{Nothing}
-        t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
-    else
-        t = vcat([(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)],
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
-end
-
-function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
-    autodiff::Bool,
-    tspan,
-    ode_params, θ)
-    function integrand(t::Number, θ)
-        innerdiff(Tar, f, autodiff, [t], θ, ode_params)
-    end
-    intprob = IntegralProblem(integrand, tspan[1], tspan[2], θ; nout = length(Tar.prob.u0))
-    # add dataset logpdf?
-    sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
-    sum(sol.u)
-end
-
-function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
-    autodiff::Bool,
-    tspan,
-    ode_params, θ)
-    minT = tspan[1]
-    maxT = tspan[2]
-
-    weights = strategy.weights ./ sum(strategy.weights)
-
-    N = length(weights)
-    points = strategy.points
-
-    difference = (maxT - minT) / N
-
-    data = Float64[]
-    for (index, item) in enumerate(weights)
-        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
-                    ((index - 1) * difference)
-        data = append!(data, temp_data)
-    end
-
-    if Tar.dataset isa Vector{Nothing}
-        t = data
-    else
-        t = vcat(data,
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
-end
-
-"""
-MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ 
-"""
-function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
-    ode_params)
-
-    # Tar used for phi and LogTargetDensity object attributes access
-    out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
-
-    # # reject samples case(write clear reason why)
-    if any(isinf, out[:, 1]) || any(isinf, ode_params)
-        return -Inf
-    end
-
-    # this is a vector{vector{dx,dy}}(handle case single u(float passed))
-    if length(out[:, 1]) == 1
-        physsol = [f(out[:, i][1],
-            ode_params,
-            t[i])
-                   for i in 1:length(out[1, :])]
-    else
-        physsol = [f(out[:, i],
-            ode_params,
-            t[i])
-                   for i in 1:length(out[1, :])]
-    end
-    physsol = reduce(hcat, physsol)
-
-    nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-    vals = nnsol .- physsol
-
-    # N dimensional vector if N outputs for NN(each row has logpdf of i[i] where u is vector of dependant variables)
-    return [logpdf(MvNormal(vals[i, :],
-            LinearAlgebra.Diagonal(map(abs2,
-                Tar.phystd[i] .*
-                ones(length(vals[i, :]))))),
-        zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
-end
-
-"""
-prior logpdf for NN parameters + ODE constants
-"""
-function priorweights(Tar::LogTargetDensity, θ)
-    allparams = Tar.priors
-    # nn weights
-    nnwparams = allparams[1]
-
-    if Tar.extraparams > 0
-        # Vector of ode parameters priors
-        invpriors = allparams[2:end]
-
-        invlogpdf = sum(logpdf(invpriors[length(θ) - i + 1], θ[i])
-                        for i in (length(θ) - Tar.extraparams + 1):length(θ); init = 0.0)
-
-        return (invlogpdf
-                +
-                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
-    else
-        return logpdf(nnwparams, θ)
-    end
-end
-
-function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    return init_params, chain, st
-end
-
-function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    return θ, chain, st
-end
-
-function generate_Tar(chain::Flux.Chain, init_params)
-    θ, re = Flux.destructure(chain)
-    return init_params, re, nothing
-end
-
-function generate_Tar(chain::Flux.Chain, init_params::Nothing)
-    θ, re = Flux.destructure(chain)
-    # find_good_stepsize,phasepoint takes only float64
-    return θ, re, nothing
-end
-
-"""
-nn OUTPUT AT t,θ ~ phi(t,θ)
-"""
-function (f::LogTargetDensity{C, S})(t::AbstractVector,
-    θ) where {C <: Optimisers.Restructure, S}
-    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
-end
-
-function (f::LogTargetDensity{C, S})(t::AbstractVector,
-    θ) where {C <: Lux.AbstractExplicitLayer, S}
-    θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
-end
-
-function (f::LogTargetDensity{C, S})(t::Number,
-    θ) where {C <: Optimisers.Restructure, S}
-    #  must handle paired odes hence u0 broadcasted
-    f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t]))
-end
-
-function (f::LogTargetDensity{C, S})(t::Number,
-    θ) where {C <: Lux.AbstractExplicitLayer, S}
-    θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y
-end
-
-"""
-similar to ode_dfdx() in NNODE/ode_solve.jl
-"""
-function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
-    if autodiff
-        hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...)
-    else
-        (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
-    end
-end
-
-function kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog, δ, λ)
-    if Kernel == HMC
-        Kernel(n_leapfrog)
-    elseif Kernel == HMCDA
-        Kernel(δ, λ)
-    else
-        Kernel(δ, max_depth = max_depth, Δ_max = Δ_max)
-    end
-end
-
-function integratorchoice(Integrator, initial_ϵ, jitter_rate,
-    tempering_rate)
-    if Integrator == JitteredLeapfrog
-        Integrator(initial_ϵ, jitter_rate)
-    elseif Integrator == TemperedLeapfrog
-        Integrator(initial_ϵ, tempering_rate)
-    else
-        Integrator(initial_ϵ)
-    end
-end
-
-function adaptorchoice(Adaptor, mma, ssa)
-    if Adaptor != AdvancedHMC.NoAdaptation()
-        Adaptor(mma, ssa)
-    else
-        AdvancedHMC.NoAdaptation()
-    end
-end
-
-"""
-```julia
-ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
-                    dataset = [nothing],init_params = nothing, 
-                    draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
-                    phystd = [0.05], priorsNNw = (0.0, 2.0),
-                    param = [],nchains = 1,autodiff = false, Kernel = HMC,
-                    Integrator = Leapfrog, Adaptor = StanHMCAdaptor,
-                    targetacceptancerate = 0.8, Metric = DiagEuclideanMetric,
-                    jitter_rate = 3.0, tempering_rate = 3.0, max_depth = 10,
-                    Δ_max = 1000, n_leapfrog = 10, δ = 0.65, λ = 0.3,
-                    progress = false,verbose = false)
-```
-!!! warn
-
-    Note that ahmc_bayesian_pinn_ode() only supports ODEs which are written in the out-of-place form, i.e.
-    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the ahmc_bayesian_pinn_ode()
-    will exit with an error.
-
-## Example
-linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
-tspan = (0.0, 10.0)
-u0 = 0.0
-p = [5.0, -5.0]
-prob = ODEProblem(linear, u0, tspan, p)
-
-# CREATE DATASET (Necessity for accurate Parameter estimation)
-sol = solve(prob, Tsit5(); saveat = 0.05)
-u = sol.u[1:100]
-time = sol.t[1:100]
-
-# dataset and BPINN create
-x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
-dataset = [x̂, time]
-
-chainflux1 = Flux.Chain(Flux.Dense(1, 5, tanh), Flux.Dense(5, 5, tanh), Flux.Dense(5, 1)
-
-# simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
-fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob,chainflux1,
-                                                                          dataset = dataset,
-                                                                          draw_samples = 1500,
-                                                                          l2std = [0.05],
-                                                                          phystd = [0.05],
-                                                                          priorsNNw = (0.0,3.0))
-
-# solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
-fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob,chainflux1,
-                                                                          dataset = dataset,
-                                                                          draw_samples = 1500,
-                                                                          l2std = [0.05],
-                                                                          phystd = [0.05],
-                                                                          priorsNNw = (0.0,3.0),
-                                                                          param = [Normal(6.5,0.5),Normal(-3,0.5)])
-
-## NOTES 
-Dataset is required for accurate Parameter estimation + solving equations
-Incase you are only solving the Equations for solution, do not provide dataset
-
-## Positional Arguments
-* `prob`: DEProblem(out of place and the function signature should be f(u,p,t)
-* `chain`: Lux/Flux Neural Netork which would be made the Bayesian PINN
-
-## Keyword Arguments
-* `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
-* `dataset`: Vector containing Vectors of corresponding u,t values 
-* `init_params`: intial parameter values for BPINN (ideally for multiple chains different initializations preferred)
-* `nchains`: number of chains you want to sample (random initialisation of params by default)
-* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
-* `l2std`: standard deviation of BPINN predicition against L2 losses/Dataset
-* `phystd`: standard deviation of BPINN predicition against Chosen Underlying ODE System
-* `priorsNNw`: Vector of [mean, std] for BPINN parameter. Weights and Biases of BPINN are Normal Distributions by default
-* `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
-* `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
-* `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
-
-# AHMC.jl is still developing convenience structs so might need changes on new releases.
-* `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implemenations HMC/NUTS/HMCDA)
-* `targetacceptancerate`: Target percentage(in decimal) of iterations in which the proposals were accepted(0.8 by default)
-* `Integrator(jitter_rate, tempering_rate), Metric, Adaptor`: https://turinglang.org/AdvancedHMC.jl/stable/
-* `max_depth`: Maximum doubling tree depth (NUTS)
-* `Δ_max`: Maximum divergence during doubling tree (NUTS)
-* `n_leapfrog`: number of leapfrog steps for HMC
-* `δ`: target acceptance probability for NUTS/HMCDA
-* `λ`: target trajectory length for HMCDA
-* `progress`: controls whether to show the progress meter or not.
-* `verbose`: controls the verbosity. (Sample call args in AHMC)
-
-"""
-
-"""
-dataset would be (x̂,t)
-priors: pdf for W,b + pdf for ODE params
-"""
-function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
-    strategy = GridTraining, dataset = [nothing],
-    init_params = nothing, draw_samples = 1000,
-    physdt = 1 / 20.0, l2std = [0.05],
-    phystd = [0.05], priorsNNw = (0.0, 2.0),
-    param = [], nchains = 1, autodiff = false,
-    Kernel = HMC, Integrator = Leapfrog,
-    Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
-    Metric = DiagEuclideanMetric, jitter_rate = 3.0,
-    tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
-    n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = false,
-    verbose = false)
-
-    # NN parameter prior mean and variance(PriorsNN must be a tuple)
-    if isinplace(prob)
-        throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
-    end
-
-    strategy = strategy == GridTraining ? strategy(physdt) : strategy
-
-    if dataset != [nothing] &&
-       (length(dataset) < 2 || !(typeof(dataset) <: Vector{<:Vector{<:AbstractFloat}}))
-        throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
-    end
-
-    if dataset != [nothing] && param == []
-        println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
-    elseif dataset == [nothing] && param != []
-        throw(error("Dataset Required for Parameter Estimation."))
-    end
-
-    if chain isa Lux.AbstractExplicitLayer || chain isa Flux.Chain
-        # Flux-vector, Lux-Named Tuple
-        initial_nnθ, recon, st = generate_Tar(chain, init_params)
-    else
-        error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")
-    end
-
-    if nchains > Threads.nthreads()
-        throw(error("number of chains is greater than available threads"))
-    elseif nchains < 1
-        throw(error("number of chains must be greater than 1"))
-    end
-
-    # eltype(physdt) cause needs Float64 for find_good_stepsize
-    if chain isa Lux.AbstractExplicitLayer
-        # Lux chain(using component array later as vector_to_parameter need namedtuple)
-        initial_θ = collect(eltype(physdt),
-            vcat(ComponentArrays.ComponentArray(initial_nnθ)))
-    else
-        initial_θ = collect(eltype(physdt), initial_nnθ)
-    end
-
-    # adding ode parameter estimation
-    nparameters = length(initial_θ)
-    ninv = length(param)
-    priors = [
-        MvNormal(priorsNNw[1] * ones(nparameters),
-            LinearAlgebra.Diagonal(map(abs2, priorsNNw[2] .* ones(nparameters)))),
-    ]
-
-    # append Ode params to all paramvector
-    if ninv > 0
-        # shift ode params(initialise ode params by prior means)
-        initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
-        priors = vcat(priors, param)
-        nparameters += ninv
-    end
-
-    t0 = prob.tspan[1]
-    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
-    ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
-        phystd, l2std, autodiff, physdt, ninv, initial_nnθ)
-
-    try
-        ℓπ(t0, initial_θ[1:(nparameters - ninv)])
-    catch err
-        if isa(err, DimensionMismatch)
-            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
-        else
-            throw(err)
-        end
-    end
-
-    # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
-    metric = Metric(nparameters)
-    hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
-
-    println("physics Logpdf is : ", physloglikelihood(ℓπ, initial_θ))
-    println("prior Logpdf is : ", priorweights(ℓπ, initial_θ))
-    println("L2lossData Logpdf is : ", L2LossData(ℓπ, initial_θ))
-    println("L2loss2 Logpdf is : ", L2loss2(ℓπ, initial_θ))
-
-    # parallel sampling option
-    if nchains != 1
-        # Cache to store the chains
-        chains = Vector{Any}(undef, nchains)
-        statsc = Vector{Any}(undef, nchains)
-        samplesc = Vector{Any}(undef, nchains)
-
-        Threads.@threads for i in 1:nchains
-            # each chain has different initial NNparameter values(better posterior exploration)
-            initial_θ = vcat(randn(nparameters - ninv),
-                initial_θ[(nparameters - ninv + 1):end])
-            initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
-            integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate,
-                tempering_rate)
-            adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
-                StepSizeAdaptor(targetacceptancerate, integrator))
-            Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max,
-                    n_leapfrog, δ, λ), integrator)
-            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
-                progress = progress, verbose = verbose)
-
-            samplesc[i] = samples
-            statsc[i] = stats
-            mcmc_chain = Chains(hcat(samples...)')
-            chains[i] = mcmc_chain
-        end
-
-        return chains, samplesc, statsc
-    else
-        initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
-        integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate, tempering_rate)
-        adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
-            StepSizeAdaptor(targetacceptancerate, integrator))
-        Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog,
-                δ, λ), integrator)
-        samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
-            adaptor; progress = progress, verbose = verbose)
-
-        # return a chain(basic chain),samples and stats
-        matrix_samples = hcat(samples...)
-        mcmc_chain = MCMCChains.Chains(matrix_samples')
-        return mcmc_chain, samples, stats
-    end
-end
\ No newline at end of file
diff --git a/test/BPINN_newform.jl b/test/BPINN_newform.jl
deleted file mode 100644
index fa2f04073e..0000000000
--- a/test/BPINN_newform.jl
+++ /dev/null
@@ -1,4354 +0,0 @@
-# # Testing Code
-using Test, MCMCChains
-using ForwardDiff, Distributions, OrdinaryDiffEq
-using Flux, OptimizationOptimisers, AdvancedHMC, Lux
-using Statistics, Random, Functors, ComponentArrays
-using NeuralPDE, MonteCarloMeasurements
-
-# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
-# on latest Julia version it performs much better for below tests
-Random.seed!(100)
-
-# for sampled params->lux ComponentArray
-function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
-    @assert length(ps_new) == Lux.parameterlength(ps)
-    i = 1
-    function get_ps(x)
-        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
-        i += length(x)
-        return z
-    end
-    return Functors.fmap(get_ps, ps)
-end
-
-## PROBLEM-1 (WITHOUT PARAMETER ESTIMATION)
-linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-linear = (u, p, t) -> cos(2 * π * t)
-tspan = (0.0, 2.0)
-u0 = 0.0
-prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-p = prob.p
-
-# Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
-ta = range(tspan[1], tspan[2], length = 300)
-u = [linear_analytic(u0, nothing, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-time = vec(collect(Float64, ta))
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# testing points for solve() call must match saveat(1/50.0) arg
-ta0 = range(tspan[1], tspan[2], length = 101)
-u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
-x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
-time1 = vec(collect(Float64, ta0))
-physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-init1, re1 = destructure(chainflux)
-θinit, st = Lux.setup(Random.default_rng(), chainlux)
-
-fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux,
-    draw_samples = 2500,
-    n_leapfrog = 30)
-
-fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux,
-    draw_samples = 2500,
-    n_leapfrog = 30)
-
-# can change training strategies by adding this to call (Quadratuer and GridTraining show good results but stochastics sampling techniques perform bad)
-# strategy = QuadratureTraining(; quadrature_alg = QuadGKJL(),
-#     reltol = 1e-6,
-#     abstol = 1e-3, maxiters = 1000,
-#     batch = 0)
-
-alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500,
-    n_leapfrog = 30)
-sol1flux = solve(prob, alg)
-
-alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500,
-    n_leapfrog = 30)
-sol1lux = solve(prob, alg)
-
-# testing points
-t = time
-# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-out = re1.(fhsamples1[(end - 500):end])
-yu = collect(out[i](t') for i in eachindex(out))
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-θ = [vector_to_parameters(fhsamples1[i], θinit) for i in 2000:2500]
-luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# --------------------- ahmc_bayesian_pinn_ode() call
-@test mean(abs.(x̂ .- meanscurve1)) < 0.05
-@test mean(abs.(physsol1 .- meanscurve1)) < 0.005
-@test mean(abs.(x̂ .- meanscurve2)) < 0.05
-@test mean(abs.(physsol1 .- meanscurve2)) < 0.005
-
-#--------------------- solve() call 
-@test mean(abs.(x̂1 .- sol1flux.ensemblesol[1])) < 0.05
-@test mean(abs.(physsol0_1 .- sol1flux.ensemblesol[1])) < 0.05
-@test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
-@test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
-
-## PROBLEM-1 (WITH PARAMETER ESTIMATION)
-linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
-linear = (u, p, t) -> cos(p * t)
-tspan = (0.0, 2.0)
-u0 = 0.0
-p = 2 * pi
-prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
-
-# Numerical and Analytical Solutions
-sol1 = solve(prob, Tsit5(); saveat = 0.01)
-u = sol1.u
-time = sol1.t
-
-# BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
-ta = range(tspan[1], tspan[2], length = 25)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) .+ (0.2 .* Array(u) .* randn(size(u))))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
-ta0 = range(tspan[1], tspan[2], length = 101)
-u1 = [linear_analytic(u0, p, ti) for ti in ta0]
-x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
-time1 = vec(collect(Float64, ta0))
-physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-using Plots, StatsPlots
-# plot(dataset[2], calderivatives(dataset)')
-yu = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-plot(yu, [linear_analytic(u0, p, t) for t in yu])
-chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-init1, re1 = destructure(chainflux1)
-θinit, st = Lux.setup(Random.default_rng(), chainlux1)
-
-fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-    dataset = dataset,
-    draw_samples = 2500,
-    physdt = 1 / 50.0f0,
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(9,
-            0.5),
-    ],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
-
-fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-    dataset = dataset,
-    draw_samples = 2500,
-    physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 10.0),
-    l2std = [0.005], phystd = [0.01],
-    param = [Normal(11, 6)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
-# original paper (pure data 0 1)
-sol1flux = solve(prob, alg)
-sol1flux.estimated_ode_params
-# pure data method 1 1
-sol2flux = solve(prob, alg)
-sol2flux.estimated_ode_params
-# pure data method 1 0
-sol3flux = solve(prob, alg)
-sol3flux.estimated_ode_params
-# deri collocation
-sol4flux = solve(prob, alg)
-sol4flux.estimated_ode_params
-# collocation
-sol5flux = solve(prob, alg)
-sol5flux.estimated_ode_params
-# collocation + L2Data loss(at 9,0.5 1,2 gives same)
-sol6flux = solve(prob, alg)
-sol6flux.estimated_ode_params
-# 2500 iters
-sol7flux = solve(prob, alg)
-sol7flux.estimated_ode_params
-
-plotly()
-plot!(yu, sol1flux.ensemblesol[1])
-plot!(yu, sol2flux.ensemblesol[1])
-plot!(yu, sol3flux.ensemblesol[1])
-plot!(yu, sol4flux.ensemblesol[1])
-plot!(yu, sol5flux.ensemblesol[1])
-plot!(yu, sol6flux.ensemblesol[1])
-
-plot!(dataset[2], dataset[1])
-
-# plot!(sol4flux.ensemblesol[1])
-# plot!(sol5flux.ensemblesol[1])
-
-sol2flux.estimated_ode_params
-
-sol1flux.estimated_ode_params
-
-sol3flux.estimated_ode_params
-
-sol4flux.estimated_ode_params
-
-sol5flux.estimated_ode_params
-
-alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset,
-    draw_samples = 2500,
-    physdt = 1 / 50.0f0,
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(9,
-            0.5),
-    ],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
-
-sol2lux = solve(prob, alg)
-
-# testing points
-t = time
-# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-out = re1.([fhsamples1[i][1:22] for i in 2000:2500])
-yu = collect(out[i](t') for i in eachindex(out))
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-θ = [vector_to_parameters(fhsamples2[i][1:(end - 1)], θinit) for i in 2000:2500]
-luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# --------------------- ahmc_bayesian_pinn_ode() call  
-@test mean(abs.(physsol1 .- meanscurve1)) < 0.15
-@test mean(abs.(physsol1 .- meanscurve2)) < 0.15
-
-# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.25 * p)
-@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.25 * p)
-
-#-------------------------- solve() call  
-@test mean(abs.(physsol1_1 .- sol2flux.ensemblesol[1])) < 8e-2
-@test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
-
-# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - sol1flux.estimated_ode_params[1]) < abs(0.15 * p)
-@test abs(p - sol2lux.estimated_ode_params[1]) < abs(0.15 * p)
-
-## PROBLEM-2
-linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
-tspan = (0.0, 10.0)
-u0 = 0.0
-p = -5.0
-prob = ODEProblem(linear, u0, tspan, p)
-linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
-
-# SOLUTION AND CREATE DATASET
-sol = solve(prob, Tsit5(); saveat = 0.1)
-u = sol.u
-time = sol.t
-x̂ = u .+ (u .* 0.2) .* randn(size(u))
-dataset = [x̂, time]
-t = sol.t
-physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)]
-
-ta0 = range(tspan[1], tspan[2], length = 501)
-u1 = [linear_analytic(u0, p, ti) for ti in ta0]
-time1 = vec(collect(Float64, ta0))
-physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-chainflux12 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh),
-    Flux.Dense(6, 1)) |> Flux.f64
-chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
-init1, re1 = destructure(chainflux12)
-θinit, st = Lux.setup(Random.default_rng(), chainlux12)
-
-using Flux
-using Random
-
-function derivatives(chainflux, dataset)
-    loss(x, y) = Flux.mse(chainflux(x), y)
-    optimizer = Flux.Optimise.ADAM(0.01)
-    epochs = 2500
-    for epoch in 1:epochs
-        Flux.train!(loss, Flux.params(chainflux), [(dataset[2]', dataset[1]')], optimizer)
-    end
-    getgradient(chainflux, dataset)
-end
-
-function getgradient(chainflux, dataset)
-    return (chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64)))) .-
-            chainflux(dataset[end]')) ./
-           sqrt(eps(eltype(dataset[end][1])))
-end
-
-ans = derivatives(chainflux12, dataset)
-
-init3, re = destructure(chainflux12)
-init2 == init1
-init3 == init2
-plot!(dataset[end], ans')
-plot!(dataset[end], chainflux12(dataset[end]')')
-
-ars = getgradient(chainflux12, dataset)
-
-plot!(dataset[end], ars')
-
-fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob,
-    chainflux12,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [
-        0.03],
-    priorsNNw = (0.0,
-        10.0),
-    n_leapfrog = 30)
-
-fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(prob,
-    chainflux12,
-    dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [
-        0.03,
-    ],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ],
-    n_leapfrog = 30)
-
-fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [0.03],
-    priorsNNw = (0.0,
-        10.0),
-    n_leapfrog = 30)
-
-fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
-    dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [0.03],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ],
-    n_leapfrog = 30)
-
-alg1 = NeuralPDE.BNNODE(chainflux12,
-    dataset = dataset,
-    draw_samples = 500,
-    l2std = [0.01],
-    phystd = [
-        0.03,
-    ],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ],
-    n_leapfrog = 30, progress = true)
-
-# original paper (pure data 0 1)
-sol1flux_pestim = solve(prob, alg1)
-sol1flux_pestim.estimated_ode_params
-# pure data method 1 1
-sol2flux_pestim = solve(prob, alg1)
-sol2flux_pestim.estimated_ode_params
-# pure data method 1 0
-sol3flux_pestim = solve(prob, alg1)
-sol3flux_pestim.estimated_ode_params
-# deri collocation
-sol4flux_pestim = solve(prob, alg1)
-sol4flux_pestim.estimated_ode_params
-# collocation
-sol5flux_pestim = solve(prob, alg1)
-sol5flux_pestim.estimated_ode_params
-# collocation + L2Data loss(at 9,0.5 1,2 gives same)
-sol6flux_pestim = solve(prob, alg1)
-sol6flux_pestim.estimated_ode_params
-
-using Plots, StatsPlots
-ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-plot(time, u)
-plot!(ars, sol1flux_pestim.ensemblesol[1])
-plot!(ars, sol2flux_pestim.ensemblesol[1])
-plot!(ars, sol3flux_pestim.ensemblesol[1])
-plot!(ars, sol4flux_pestim.ensemblesol[1])
-plot!(ars, sol5flux_pestim.ensemblesol[1])
-plot!(ars, sol6flux_pestim.ensemblesol[1])
-
-sol3flux_pestim.estimated_ode_params
-
-sol4flux_pestim.estimated_ode_params
-
-sol5flux_pestim.estimated_ode_params
-
-sol6flux_pestim.estimated_ode_params
-
-ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-
-init, re1 = destructure(chainflux12)
-init
-init1
-alg = NeuralPDE.BNNODE(chainlux12,
-    dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [0.03],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ],
-    n_leapfrog = 30)
-
-sol3lux_pestim = solve(prob, alg)
-
-# testing timepoints
-t = sol.t
-#------------------------------ ahmc_bayesian_pinn_ode() call 
-# Mean of last 500 sampled parameter's curves(flux chains)[Ensemble predictions]
-out = re1.([fhsamplesflux12[i][1:61] for i in 1000:1500])
-yu = [out[i](t') for i in eachindex(out)]
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1_1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-out = re1.([fhsamplesflux22[i][1:61] for i in 1000:1500])
-yu = [out[i](t') for i in eachindex(out)]
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1_2 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-@test mean(abs.(sol.u .- meanscurve1_1)) < 1e-2
-@test mean(abs.(physsol1 .- meanscurve1_1)) < 1e-2
-@test mean(abs.(sol.u .- meanscurve1_2)) < 5e-2
-@test mean(abs.(physsol1 .- meanscurve1_2)) < 5e-2
-
-# estimated parameters(flux chain)
-param1 = mean(i[62] for i in fhsamplesflux22[1000:1500])
-@test abs(param1 - p) < abs(0.3 * p)
-
-# Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
-θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
-luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500]
-luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-@test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
-@test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
-@test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
-@test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
-
-# estimated parameters(lux chain)
-param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
-@test abs(param1 - p) < abs(0.3 * p)
-
-#-------------------------- solve() call 
-# (flux chain)
-@test mean(abs.(physsol2 .- sol3flux_pestim.ensemblesol[1])) < 0.15
-# estimated parameters(flux chain)
-param1 = sol3flux_pestim.estimated_ode_params[1]
-@test abs(param1 - p) < abs(0.45 * p)
-
-# (lux chain)
-@test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
-# estimated parameters(lux chain)
-param1 = sol3lux_pestim.estimated_ode_params[1]
-@test abs(param1 - p) < abs(0.45 * p)
-
-using Plots, StatsPlots
-using NoiseRobustDifferentiation, Weave, DataInterpolations
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood
-# # 25 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-#     draw_samples = 1500, physdt = 1 / 50.0f0, phystd = [0.01],
-#     l2std = [0.01],
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1 = solve(prob, alg)
-# sol2flux1.estimated_ode_params[1] #6.41722 Particles{Float64, 1}, 6.02404 Particles{Float64, 1}
-# sol2flux2 = solve(prob, alg)
-# sol2flux2.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.07509 Particles{Float64, 1}
-# sol2flux3 = solve(prob, alg)
-# sol2flux3.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.00825 Particles{Float64, 1}
-
-# # 50 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11 = solve(prob, alg)
-# sol2flux11.estimated_ode_params[1] #5.71268 Particles{Float64, 1}, 6.07242 Particles{Float64, 1}
-# sol2flux22 = solve(prob, alg)
-# sol2flux22.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.04837 Particles{Float64, 1}
-# sol2flux33 = solve(prob, alg)
-# sol2flux33.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.02838 Particles{Float64, 1}
-
-# # 100 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111 = solve(prob, alg)
-# sol2flux111.estimated_ode_params[1] #6.59097 Particles{Float64, 1}, 5.89384 Particles{Float64, 1}
-# sol2flux222 = solve(prob, alg)
-# sol2flux222.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.88216 Particles{Float64, 1}
-# sol2flux333 = solve(prob, alg)
-# sol2flux333.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.85327 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, full likelihood cdm
-# # 25 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1_cdm = solve(prob, alg)
-# sol2flux1_cdm.estimated_ode_params[1]# 6.50506 Particles{Float64, 1} ,6.38963 Particles{Float64, 1}
-# sol2flux2_cdm = solve(prob, alg)
-# sol2flux2_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.39817 Particles{Float64, 1}
-# sol2flux3_cdm = solve(prob, alg)
-# sol2flux3_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.36296 Particles{Float64, 1}
-
-# # 50 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11_cdm = solve(prob, alg)
-# sol2flux11_cdm.estimated_ode_params[1] #6.52951 Particles{Float64, 1},5.15621 Particles{Float64, 1}
-# sol2flux22_cdm = solve(prob, alg)
-# sol2flux22_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.16363 Particles{Float64, 1}
-# sol2flux33_cdm = solve(prob, alg)
-# sol2flux33_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.15591 Particles{Float64, 1}
-
-# # 100 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111_cdm = solve(prob, alg)
-# sol2flux111_cdm.estimated_ode_params[1] #6.74338 Particles{Float64, 1}, 9.72422 Particles{Float64, 1}
-# sol2flux222_cdm = solve(prob, alg)
-# sol2flux222_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.71991 Particles{Float64, 1}
-# sol2flux333_cdm = solve(prob, alg)
-# sol2flux333_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.75045 Particles{Float64, 1}
-
-# --------------------------------------------------------------------------------------
-#                              NEW SERIES OF TESTS (IN ORDER OF EXECUTION)
-#  -------------------------------------------------------------------------------------
-# original paper implementaion
-# 25 points
-ta = range(tspan[1], tspan[2], length = 25)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, u .+ 0.05 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset1 = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-# scatter!(time, u)
-# dataset
-# scatter!(dataset1[2], dataset1[1])
-# plot(time, physsol1)
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_normal = solve(prob, alg)
-sol2flux1_normal.estimated_ode_params[1]  #7.70593 Particles{Float64, 1}, 6.36096 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
-sol2flux2_normal = solve(prob, alg)
-sol2flux2_normal.estimated_ode_params[1] #6.66347 Particles{Float64, 1}, 6.36974 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
-sol2flux3_normal = solve(prob, alg)
-sol2flux3_normal.estimated_ode_params[1] #6.84827 Particles{Float64, 1}, 6.29555 Particles{Float64, 1} | 6.39947 Particles{Float64, 1}
-
-# 50 points
-ta = range(tspan[1], tspan[2], length = 50)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset2 = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_normal = solve(prob, alg)
-sol2flux11_normal.estimated_ode_params[1] #7.83577 Particles{Float64, 1},6.24652 Particles{Float64, 1} | 6.34495 Particles{Float64, 1}
-sol2flux22_normal = solve(prob, alg)
-sol2flux22_normal.estimated_ode_params[1] #6.49477 Particles{Float64, 1},6.2118 Particles{Float64, 1} | 6.32476 Particles{Float64, 1}
-sol2flux33_normal = solve(prob, alg)
-sol2flux33_normal.estimated_ode_params[1] #6.47421 Particles{Float64, 1},6.33687 Particles{Float64, 1} | 6.2448 Particles{Float64, 1}
-
-# 100 points
-ta = range(tspan[1], tspan[2], length = 100)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset3 = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_normal = solve(prob, alg)
-sol2flux111_normal.estimated_ode_params[1] #5.96604 Particles{Float64, 1},5.99588 Particles{Float64, 1} | 6.19805 Particles{Float64, 1}
-sol2flux222_normal = solve(prob, alg)
-sol2flux222_normal.estimated_ode_params[1] #6.05432 Particles{Float64, 1},6.0768 Particles{Float64, 1} | 6.22948 Particles{Float64, 1}
-sol2flux333_normal = solve(prob, alg)
-sol2flux333_normal.estimated_ode_params[1] #6.08856 Particles{Float64, 1},5.94819 Particles{Float64, 1} | 6.2551 Particles{Float64, 1}
-
-# LOTKA VOLTERRA CASE 
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u01 = [1.0, 1.0]
-p1 = [1.5, 1.0, 3.0, 1.0]
-tspan1 = (0.0, 6.0)
-prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
-
-# chainlux = Lux.Chain(Lux.Dense(1, 7, Lux.tanh), Lux.Dense(7, 7, Lux.tanh), Lux.Dense(7, 2))
-chainflux1 = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2))
-
-#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
-t1 = collect(Float64, prob1.tspan[1]:(1 / 50.0):prob1.tspan[2])
-
-# --------------------------------------------------------------------------
-# original paper implementaion lotka volterra
-# 31 points  
-solution1 = solve(prob1, Tsit5(); saveat = 0.1)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] .+ 0.3 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] .+ 0.3 .* u1[2, :] .* randn(length(u1[2, :]))
-dataset2_1 = [x1, y1, time1]
-plot(dataset2_1[end], dataset2_1[1])
-plot!(dataset2_1[end], dataset2_1[2])
-plot!(time1, u1[1, :])
-plot!(time1, u1[2, :])
-
-alg1 = NeuralPDE.BNNODE(chainflux1,
-    dataset = dataset2_1,
-    draw_samples = 1000,
-    physdt = 1 / 20.0,
-    l2std = [
-        0.2,
-        0.2,
-    ],
-    phystd = [
-        0.5,
-        0.5,
-    ],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(4,
-            3),
-        Normal(-2,
-            4),
-        Normal(0,
-            5),
-        Normal(2.5,
-            2)],
-    n_leapfrog = 30, progress = true)
-
-# original paper (pure data 0 1)
-sol1flux1_lotka = solve(prob1, alg1)
-sol1flux1_lotka.estimated_ode_params
-# pure data method 1 1
-sol2flux1_lotka = solve(prob1, alg1)
-sol2flux1_lotka.estimated_ode_params
-# pure data method 1 0
-sol3flux1_lotka = solve(prob1, alg1)
-sol3flux1_lotka.estimated_ode_params
-# deri collocation
-sol4flux1_lotka = solve(prob1, alg1)
-sol4flux1_lotka.estimated_ode_params
-# collocation
-sol5flux1_lotka = solve(prob1, alg1)
-sol5flux1_lotka.estimated_ode_params
-# collocation + L2Data loss(at 9,0.5 1,2 gives same)
-sol6flux1_lotka = solve(prob1, alg1)
-sol6flux1_lotka.estimated_ode_params
-
-sol7flux1_lotka = solve(prob1, alg1)
-sol7flux1_lotka.estimated_ode_params
-
-using Plots, StatsPlots
-plot(dataset2_1[3], u1[1, :])
-plot!(dataset2_1[3], u1[2, :])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol5flux1_normal.ensemblesol[2])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]),
-    sol1flux1_normal.ensemblesol[1],
-    legend = :outerbottomleft)
-sol1flux2_normal = solve(prob1, alg1)
-sol1flux2_normal.estimated_ode_params  #|
-sol1flux3_normal = solve(prob1, alg1)
-sol1flux3_normal.estimated_ode_params  #|
-sol1flux4_normal = solve(prob1, alg1)
-sol1flux4_normal.estimated_ode_params
-
-plotly()
-plot!(title = "yuh")
-plot!(dataset2_1[3], dataset2_1[1])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux1_normal.ensemblesol[1])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux2_normal.ensemblesol[1])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux3_normal.ensemblesol[2])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux4_normal.ensemblesol[1])
-plot(time1, u1[1, :])
-plot!(time1, u1[2, :])
-
-ars = chainflux1(dataset2_1[end]')
-plot(ars[1, :])
-plot!(ars[2, :])
-
-function calculate_derivatives(dataset)
-    u = dataset[1]
-    u1 = dataset[2]
-    t = dataset[end]
-    # control points
-    n = Int(floor(length(t) / 10))
-    # spline for datasetvalues(solution) 
-    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    interp = CubicSpline(u, t)
-    interp1 = CubicSpline(u1, t)
-    # derrivatives interpolation
-    dx = t[2] - t[1]
-    time = collect(t[1]:dx:t[end])
-    smoothu = [interp(i) for i in time]
-    smoothu1 = [interp1(i) for i in time]
-    # derivative of the spline (must match function derivative) 
-    û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
-    û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
-    # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
-    # FDM
-    # û1 = diff(u) / dx
-    # dataset[1] and smoothu are almost equal(rounding errors)
-    return û, û1
-    # return 1
-end
-
-ar = calculate_derivatives(dataset2_1)
-plot(ar[1])
-plot!(ar[2])
-
-# 61 points
-solution1 = solve(prob1, Tsit5(); saveat = 0.1)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_2 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_2,
-    draw_samples = 1000,
-    l2std = [
-        0.1,
-        0.1,
-    ],
-    phystd = [
-        0.1,
-        0.1,
-    ],
-    priorsNNw = (0.0,
-        5.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_normal = solve(prob1, alg1)
-sol1flux11_normal.estimated_ode_params #|
-sol1flux22_normal = solve(prob1, alg1)
-sol1flux22_normal.estimated_ode_params #|
-sol1flux33_normal = solve(prob1, alg1)
-sol1flux33_normal.estimated_ode_params #|
-
-# 121 points
-solution1 = solve(prob1, Tsit5(); saveat = 0.05)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_3 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_3,
-    draw_samples = 1000,
-    l2std = [
-        0.1,
-        0.1,
-    ],
-    phystd = [
-        0.1,
-        0.1,
-    ],
-    priorsNNw = (0.0,
-        5.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_normal = solve(prob1, alg1)
-sol1flux111_normal.estimated_ode_params #|
-sol1flux222_normal = solve(prob1, alg1)
-sol1flux222_normal.estimated_ode_params #|
-sol1flux333_normal = solve(prob1, alg1)
-sol1flux333_normal.estimated_ode_params #| 
-
-# -------------------------------------------------------------------- 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# Sampling 100%|███████████████████████████████| Time: 0:02:30
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# Sampling 100%|███████████████████████████████| Time: 0:01:54
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# Sampling 100%|███████████████████████████████| Time: 0:01:59
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# Sampling 100%|███████████████████████████████| Time: 0:02:44
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# Sampling 100%|███████████████████████████████| Time: 0:02:41
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# Sampling 100%|███████████████████████████████| Time: 0:02:41
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# Sampling 100%|███████████████████████████████| Time: 0:03:52
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# Sampling 100%|███████████████████████████████| Time: 0:03:49
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# Sampling 100%|███████████████████████████████| Time: 0:03:50
-
-# # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
-# physics Logpdf is : -6.659143464386241e7
-# prior Logpdf is : -150.30074579848434
-# L2lossData Logpdf is : -6.03075717462954e6
-# Sampling 100%|███████████████████████████████| Time: 0:04:54
-
-# physics Logpdf is : -8.70012053004202e8
-# prior Logpdf is : -150.3750892952511
-# L2lossData Logpdf is : -6.967914805207133e6
-# Sampling 100%|███████████████████████████████| Time: 0:05:09
-
-# physics Logpdf is : -5.417241281343099e7
-# prior Logpdf is : -150.52079555737976
-# L2lossData Logpdf is : -4.195953436792884e6
-# Sampling 100%|███████████████████████████████| Time: 0:05:01
-
-# physics Logpdf is : -4.579552981943833e8
-# prior Logpdf is : -150.30491731974283
-# L2lossData Logpdf is : -8.595475827260146e6
-# Sampling 100%|███████████████████████████████| Time: 0:06:08
-
-# physics Logpdf is : -1.989281834955769e7
-# prior Logpdf is : -150.16009042727543
-# L2lossData Logpdf is : -1.121270659669029e7
-# Sampling 100%|███████████████████████████████| Time: 0:05:38
-
-# physics Logpdf is : -8.683829147264534e8
-# prior Logpdf is : -150.37824872259102
-# L2lossData Logpdf is : -1.0887662888035845e7
-# Sampling 100%|███████████████████████████████| Time: 0:05:50
-
-# physics Logpdf is : -3.1944760610332566e8
-# prior Logpdf is : -150.33610348737565
-# L2lossData Logpdf is : -1.215458786744478e7
-# Sampling 100%|███████████████████████████████| Time: 0:10:50
-
-# physics Logpdf is : -3.2884572300341567e6
-# prior Logpdf is : -150.21002268156343
-# L2lossData Logpdf is : -1.102536731511176e7
-# Sampling 100%|███████████████████████████████| Time: 0:09:53
-
-# physics Logpdf is : -5.31293521002414e8
-# prior Logpdf is : -150.20948536040126
-# L2lossData Logpdf is : -1.818717239584132e7
-# Sampling 100%|███████████████████████████████| Time: 0:08:53
-
-# ----------------------------------------------------------
-# Full likelihood no l2 only new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new = solve(prob, alg)
-sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.21662 Particles{Float64, 1}
-sol2flux2_new = solve(prob, alg)
-sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 7.14238 Particles{Float64, 1}
-sol2flux3_new = solve(prob, alg)
-sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.79159 Particles{Float64, 1}
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new = solve(prob, alg)
-sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 5.33467 Particles{Float64, 1}
-sol2flux22_new = solve(prob, alg)
-sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.52419 Particles{Float64, 1}
-sol2flux33_new = solve(prob, alg)
-sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 5.36921 Particles{Float64, 1}
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new = solve(prob, alg)
-sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.45333 Particles{Float64, 1}
-sol2flux222_new = solve(prob, alg)
-sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 4.64417 Particles{Float64, 1}
-sol2flux333_new = solve(prob, alg)
-sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 5.88037 Particles{Float64, 1}
-# ---------------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new_all = solve(prob, alg)
-sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.4358 Particles{Float64, 1}
-sol2flux2_new_all = solve(prob, alg)
-sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 6.52449 Particles{Float64, 1}
-sol2flux3_new_all = solve(prob, alg)
-sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.34188 Particles{Float64, 1}
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new_all = solve(prob, alg)
-sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.37889 Particles{Float64, 1}
-sol2flux22_new_all = solve(prob, alg)
-sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.34747 Particles{Float64, 1}
-sol2flux33_new_all = solve(prob, alg)
-sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.39699 Particles{Float64, 1}
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new_all = solve(prob, alg)
-sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.24327 Particles{Float64, 1}
-sol2flux222_new_all = solve(prob, alg)
-sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 6.23928 Particles{Float64, 1}
-sol2flux333_new_all = solve(prob, alg)
-sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 6.2145 Particles{Float64, 1}
-
-# ---------------------------------------------------------------------------
-# Full likelihood  l2 + new L22(dataset gradients) lotka volterra
-# 36 points 
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_1,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux1_new_all = solve(prob1, alg1)
-sol1flux1_new_all.estimated_ode_params[1]  #|
-sol1flux2_new_all = solve(prob1, alg1)
-sol1flux2_new_all.estimated_ode_params[1] #|
-sol1flux3_new_all = solve(prob1, alg1)
-sol1flux3_new_all.estimated_ode_params[1] #|
-
-# 61 points 
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_2,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_new_all = solve(prob1, alg1)
-sol1flux11_new_all.estimated_ode_params[1] #|
-sol1flux22_new_all = solve(prob1, alg1)
-sol1flux22_new_all.estimated_ode_params[1] #|
-sol1flux33_new_all = solve(prob1, alg1)
-sol1flux33_new_all.estimated_ode_params[1] #|
-
-# 121 points 
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_3,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_new_all = solve(prob1, alg1)
-sol1flux111_new_all.estimated_ode_params[1] #|
-sol1flux222_new_all = solve(prob1, alg1)
-sol1flux222_new_all.estimated_ode_params[1] #|
-sol1flux333_new_all = solve(prob1, alg1)
-sol1flux333_new_all.estimated_ode_params[1] #|
-# -------------------------------------------------------------------- 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# L2loss2 Logpdf is : -757.9047847584478
-# Sampling 100%|███████████████████████████████| Time: 0:02:32
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# L2loss2 Logpdf is : -757.9047847584478
-# Sampling 100%|███████████████████████████████| Time: 0:02:19
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# L2loss2 Logpdf is : -757.9047847584478
-# Sampling 100%|███████████████████████████████| Time: 0:02:31
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# L2loss2 Logpdf is : -1517.3653615845183
-# Sampling 100%|███████████████████████████████| Time: 0:03:45
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# L2loss2 Logpdf is : -1517.3653615845183
-# Sampling 100%|███████████████████████████████| Time: 0:03:20
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# L2loss2 Logpdf is : -1517.3653615845183
-# Sampling 100%|███████████████████████████████| Time: 0:03:20
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# L2loss2 Logpdf is : -3037.8868319811254
-# Sampling 100%|███████████████████████████████| Time: 0:04:57
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# L2loss2 Logpdf is : -3037.8868319811254
-# Sampling 100%|███████████████████████████████| Time: 0:05:26
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# L2loss2 Logpdf is : -3037.8868319811254
-# Sampling 100%|███████████████████████████████| Time: 0:05:01
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(dataset gradients)
-# 25 points
-# 1*,2*,  
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_newdata_all = solve(prob, alg)
-sol2flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 5.73072 Particles{Float64, 1}
-sol2flux2_newdata_all = solve(prob, alg)
-sol2flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 5.71597 Particles{Float64, 1}
-sol2flux3_newdata_all = solve(prob, alg)
-sol2flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 5.7313 Particles{Float64, 1}
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_newdata_all = solve(prob, alg)
-sol2flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.07153 Particles{Float64, 1}
-sol2flux22_newdata_all = solve(prob, alg)
-sol2flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.06623 Particles{Float64, 1}
-sol2flux33_newdata_all = solve(prob, alg)
-sol2flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.12748 Particles{Float64, 1}
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_newdata_all = solve(prob, alg)
-sol2flux111_newdata_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.26222 Particles{Float64, 1}
-sol2flux222_newdata_all = solve(prob, alg)
-sol2flux222_newdata_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 5.86494 Particles{Float64, 1}
-sol2flux333_newdata_all = solve(prob, alg)
-sol2flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} |  
-
-# ---------------------------------------------------------------------------
-
-# LOTKA VOLTERRA CASE
-using Plots, StatsPlots
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u01 = [1.0, 1.0]
-p1 = [1.5, 1.0, 3.0, 1.0]
-tspan1 = (0.0, 6.0)
-prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
-
-chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
-
-#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
-t1 = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-
-# --------------------------------------------------------------------------
-# original paper implementaion
-# 25 points  
-solution1 = solve(prob1, Tsit5(); saveat = 0.2)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_1 = [x1, y1, time1]
-
-plot(time1, u1[1, :])
-plot!(time1, u1[2, :])
-scatter!(dataset2_1[3], dataset2_1[1])
-scatter!(dataset2_1[3], dataset2_1[2])
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_1,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux1_normal = solve(prob1, alg1)
-sol1flux1_normal.estimated_ode_params[1]  #|
-sol1flux2_normal = solve(prob1, alg1)
-sol1flux2_normal.estimated_ode_params[1] #|
-sol1flux3_normal = solve(prob1, alg1)
-sol1flux3_normal.estimated_ode_params[1] #|
-
-# 50 points
-solution1 = solve(prob1, Tsit5(); saveat = 0.05)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_2 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_2,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_normal = solve(prob1, alg1)
-sol1flux11_normal.estimated_ode_params[1] #|
-sol1flux22_normal = solve(prob1, alg1)
-sol1flux22_normal.estimated_ode_params[1] #|
-sol1flux33_normal = solve(prob1, alg1)
-sol1flux33_normal.estimated_ode_params[1] #|
-
-# 100 points
-solution = solve(prob1, Tsit5(); saveat = 0.05)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_3 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_3,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_normal = solve(prob1, alg1)
-sol1flux111_normal.estimated_ode_params[1] #|
-sol1flux222_normal = solve(prob1, alg1)
-sol1flux222_normal.estimated_ode_params[1] #|
-sol1flux333_normal = solve(prob1, alg1)
-sol1flux333_normal.estimated_ode_params[1] #|
-
-# --------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood no l2 only new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new = solve(prob, alg)
-sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
-sol2flux2_new = solve(prob, alg)
-sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
-sol2flux3_new = solve(prob, alg)
-sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new = solve(prob, alg)
-sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
-sol2flux22_new = solve(prob, alg)
-sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
-sol2flux33_new = solve(prob, alg)
-sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new = solve(prob, alg)
-sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}   |
-sol2flux222_new = solve(prob, alg)
-sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}     |
-sol2flux333_new = solve(prob, alg)
-sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
-# ---------------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new_all = solve(prob, alg)
-sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1}  |
-sol2flux2_new_all = solve(prob, alg)
-sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}    |
-sol2flux3_new_all = solve(prob, alg)
-sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}   |
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new_all = solve(prob, alg)
-sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
-sol2flux22_new_all = solve(prob, alg)
-sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
-sol2flux33_new_all = solve(prob, alg)
-sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new_all = solve(prob, alg)
-sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}  |
-sol2flux222_new_all = solve(prob, alg)
-sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}    |
-sol2flux333_new_all = solve(prob, alg)
-sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}  |
-
-# ---------------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(dataset gradients)
-# 25 points 
-# *1,*2 vs *2.5
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol1flux1_newdata_all = solve(prob, alg)
-sol1flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
-sol1flux2_newdata_all = solve(prob, alg)
-sol1flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
-sol1flux3_newdata_all = solve(prob, alg)
-sol1flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_newdata_all = solve(prob, alg)
-sol1flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}    |
-sol1flux22_newdata_all = solve(prob, alg)
-sol1flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}    |
-sol1flux33_newdata_all = solve(prob, alg)
-sol1flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1}   |
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_newdata_all = solve(prob, alg)
-sol1flux111_newdata_all.estimated_ode_params[1]  #|
-sol1flux222_newdata_all = solve(prob, alg)
-sol1flux222_newdata_all.estimated_ode_params[1]  #|
-sol1flux333_newdata_all = solve(prob, alg)
-sol1flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
-
-# ------------------------------------------------------------------------------------------------------------------------------
-
-# sol2flux111.estimated_ode_params[1]
-# # mine *5
-# 7.03386Particles{Float64, 1}
-# # normal
-# 6.38951Particles{Float64, 1}
-# 6.67657Particles{Float64, 1}
-# # mine *10
-# 7.53672Particles{Float64, 1}
-# # mine *2
-# 6.29005Particles{Float64, 1}
-# 6.29844Particles{Float64, 1}
-
-# # new mine *2
-# 6.39008Particles{Float64, 1}
-# 6.22071Particles{Float64, 1}
-# 6.15611Particles{Float64, 1}
-
-# # new mine *2 tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
-# 6.25549Particles{Float64, 1}
-# ----------------------------------------------------------
-
-# ---------------------------------------------------
-
-function calculate_derivatives1(dataset)
-    x̂, time = dataset
-    num_points = length(x̂)
-    # Initialize an array to store the derivative values.
-    derivatives = similar(x̂)
-
-    for i in 2:(num_points - 1)
-        # Calculate the first-order derivative using central differences.
-        Δt_forward = time[i + 1] - time[i]
-        Δt_backward = time[i] - time[i - 1]
-
-        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-        derivatives[i] = derivative
-    end
-
-    # Derivatives at the endpoints can be calculated using forward or backward differences.
-    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-    return derivatives
-end
-
-function calculate_derivatives2(dataset)
-    u = dataset[1]
-    t = dataset[2]
-    # control points
-    n = Int(floor(length(t) / 10))
-    # spline for datasetvalues(solution) 
-    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    interp = CubicSpline(u, t)
-    # derrivatives interpolation
-    dx = t[2] - t[1]
-    time = collect(t[1]:dx:t[end])
-    smoothu = [interp(i) for i in time]
-    # derivative of the spline (must match function derivative) 
-    û = tvdiff(smoothu, 20, 0.03, dx = dx, ε = 1)
-    # tvdiff(smoothu, 100, 0.1, dx = dx)
-    # 
-    # 
-    # FDM
-    û1 = diff(u) / dx
-    # dataset[1] and smoothu are almost equal(rounding errors)
-    return û, time, smoothu, û1
-end
-
-# need to do this for all datasets
-c = [linear(prob.u0, p, t) for t in dataset3[2]] #ideal case
-b = calculate_derivatives1(dataset2) #central diffs
-# a = calculate_derivatives2(dataset) #tvdiff(smoothu, 100, 0.1, dx = dx)
-d = calculate_derivatives2(dataset1) #tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
-d = calculate_derivatives2(dataset2)
-d = calculate_derivatives2(dataset3)
-mean(abs2.(c .- b))
-mean(abs2.(c .- d[1]))
-loss(model, x, y) = mean(abs2.(model(x) .- y));
-scatter!(prob.u0 .+ (prob.tspan[2] .- dataset3[2]) .* chainflux1(dataset3[2]')')
-loss(chainflux1, dataset3[2]', dataset3[1]')
-# mean(abs2.(c[1:24] .- a[4]))
-plot(c, label = "ideal deriv")
-plot!(b, label = "Centraldiff deriv")
-# plot!(a[1], label = "tvdiff(0.1,def) derivatives") 
-plot!(d[1], label = "tvdiff(0.035,20) derivatives")
-plotly()
-
-# GridTraining , NoiseRobustDiff dataset[2][2]-dataset[2][1] l2std
-# 25 points 
-ta = range(tspan[1], tspan[2], length = 25)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-time1 = collect(tspan[1]:(1 / 50.0):tspan[2])
-physsol = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-plot(physsol, label = "solution")
-
-# plots from 32(deriv)
-# for d
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux1 = solve(prob, alg)
-n2_sol2flux1.estimated_ode_params[1]
-# with extra likelihood 
-# 10.2011Particles{Float64, 1}
-
-# without extra likelihood 
-# 6.25791Particles{Float64, 1}
-# 6.29539Particles{Float64, 1}
-
-plot!(n2_sol2flux1.ensemblesol[1], label = "tvdiff(0.035,1) derivpar")
-plot(dataset[1])
-plot!(physsol1)
-# for a
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux2 = solve(prob, alg)
-n2_sol2flux2.estimated_ode_params[1]
-# with extra likelihood
-# 8.73602Particles{Float64, 1}
-# without extra likelihood
-
-plot!(n2_sol2flux2.ensemblesol[1],
-    label = "tvdiff(0.1,def) derivatives",
-    legend = :outerbottomleft)
-
-# for b
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux3 = solve(prob, alg)
-n2_sol2flux3.estimated_ode_params[1]
-plot!(n2_sol2flux3.ensemblesol[1], label = "Centraldiff deriv")
-
-# for c
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux4 = solve(prob, alg)
-n2_sol2flux4.estimated_ode_params[1]
-plot!(n2_sol2flux4.ensemblesol[1], label = "ideal deriv")
-
-# 50 points 
-
-ta = range(tspan[1], tspan[2], length = 50)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux11 = solve(prob, alg)
-n2_sol2flux11.estimated_ode_params[1]
-
-# 5.90049Particles{Float64, 1}
-# 100 points
-ta = range(tspan[1], tspan[2], length = 100)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux111 = solve(prob, alg)
-n2_sol2flux111.estimated_ode_params[1]
-plot!(n2_sol2flux111.ensemblesol[1])
-8.88555Particles{Float64, 1}
-
-# 7.15353Particles{Float64, 1}
-# 6.21059 Particles{Float64, 1}
-# 6.31836Particles{Float64, 1}
-0.1 * p
-# ----------------------------------------------------------
-
-# Gives the linear interpolation value at t=3.5
-
-# # Problem 1 with param esimation
-# # dataset 0-1 2 percent noise
-# p = 6.283185307179586
-# # partial_logdensity
-# 6.3549Particles{Float64, 1}
-# # full log_density
-# 6.34667Particles{Float64, 1}
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2lux.estimated_ode_params[1]
-
-# # dataset 0-1 20 percent noise
-# # partial log_density
-# 6.30244Particles{Float64, 1}
-# # full log_density
-# 6.24637Particles{Float64, 1}
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # dataset 0-2 20percent noise
-# # partial log_density
-# 6.24948Particles{Float64, 1}
-# # full log_density
-# 6.26095Particles{Float64, 1}
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
-# linear = (u, p, t) -> cos(p * t)
-# tspan = (0.0, 2.0)
-
-# # dataset 0-1 2 percent noise
-# p = 6.283185307179586
-# # partial_logdensity
-# 6.3549Particles{Float64, 1}
-# # full log_density
-# 6.34667Particles{Float64, 1}
-
-# # dataset 0-1 20 percent noise
-# # partial log_density
-# 6.30244Particles{Float64, 1}
-# # full log_density
-# 6.24637Particles{Float64, 1}
-
-# # dataset 0-2 20percent noise
-# # partial log_density
-# 6.24948Particles{Float64, 1}
-# # full log_density
-# 6.26095Particles{Float64, 1}
-
-# # dataset 0-2 20percent noise 50 points(above all are 100 points)
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# # i kinda win on 25 points again
-# # dataset 0-2 20percent noise 25 points
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # i win with 25 points
-# # dataset 0-1 20percent noise 25 points
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# # new
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# # New
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # (9,2.5)(above are (9,0.5))
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# # just prev was repeat(just change)
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # i lose on 0-1,50 points
-# # dataset 0-1 20percent noise 50 points
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # (9,2.5) (above are (9,0.5))
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # ----------------------------------------------------------
-# # Problem 1 with param estimation
-# # physdt=1/20, Full likelihood new 0.5*l2std
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n05_sol2flux1 = solve(prob, alg)
-# n05_sol2flux1.estimated_ode_params[1] #6.90953 Particles{Float64, 1}
-# n05_sol2flux2 = solve(prob, alg)
-# n05_sol2flux2.estimated_ode_params[1] #6.82374 Particles{Float64, 1}
-# n05_sol2flux3 = solve(prob, alg)
-# n05_sol2flux3.estimated_ode_params[1] #6.84465 Particles{Float64, 1}
-
-# using Plots, StatsPlots
-# plot(n05_sol2flux3.ensemblesol[1])
-# plot!(physsol1)
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n05_sol2flux11 = solve(prob, alg)
-# n05_sol2flux11.estimated_ode_params[1] #7.0262 Particles{Float64, 1}
-# n05_sol2flux22 = solve(prob, alg)
-# n05_sol2flux22.estimated_ode_params[1] #5.56438 Particles{Float64, 1}
-# n05_sol2flux33 = solve(prob, alg)
-# n05_sol2flux33.estimated_ode_params[1] #7.27189 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n05_sol2flux111 = solve(prob, alg)
-# n05_sol2flux111.estimated_ode_params[1] #6.90549 Particles{Float64, 1}
-# n05_sol2flux222 = solve(prob, alg)
-# n05_sol2flux222.estimated_ode_params[1] #5.42436 Particles{Float64, 1}
-# n05_sol2flux333 = solve(prob, alg)
-# n05_sol2flux333.estimated_ode_params[1] #6.05832 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new 2*l2std
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2_sol2flux1 = solve(prob, alg)
-# n2_sol2flux1.estimated_ode_params[1]#6.9087 Particles{Float64, 1}
-# n2_sol2flux2 = solve(prob, alg)
-# n2_sol2flux2.estimated_ode_params[1]#6.86507 Particles{Float64, 1}
-# n2_sol2flux3 = solve(prob, alg)
-# n2_sol2flux3.estimated_ode_params[1]#6.59206 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2_sol2flux11 = solve(prob, alg)
-# n2_sol2flux11.estimated_ode_params[1]#7.3715 Particles{Float64, 1}
-# n2_sol2flux22 = solve(prob, alg)
-# n2_sol2flux22.estimated_ode_params[1]#9.84477 Particles{Float64, 1}
-# n2_sol2flux33 = solve(prob, alg)
-# n2_sol2flux33.estimated_ode_params[1]#6.87107 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2_sol2flux111 = solve(prob, alg)
-# n2_sol2flux111.estimated_ode_params[1]#6.60739 Particles{Float64, 1}
-# n2_sol2flux222 = solve(prob, alg)
-# n2_sol2flux222.estimated_ode_params[1]#7.05923 Particles{Float64, 1}
-# n2_sol2flux333 = solve(prob, alg)
-# n2_sol2flux333.estimated_ode_params[1]#6.5017 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new all 2*l2std
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2all5sol2flux1 = solve(prob, alg)
-# n2all5sol2flux1.estimated_ode_params[1]#11.3659 Particles{Float64, 1}
-# n2all5sol2flux2 = solve(prob, alg)
-# n2all5sol2flux2.estimated_ode_params[1]#6.65634 Particles{Float64, 1}
-# n2all5sol2flux3 = solve(prob, alg)
-# n2all5sol2flux3.estimated_ode_params[1]#6.61905 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2all5sol2flux11 = solve(prob, alg)
-# n2all5sol2flux11.estimated_ode_params[1]#6.27555 Particles{Float64, 1}
-# n2all5sol2flux22 = solve(prob, alg)
-# n2all5sol2flux22.estimated_ode_params[1]#6.24352 Particles{Float64, 1}
-# n2all5sol2flux33 = solve(prob, alg)
-# n2all5sol2flux33.estimated_ode_params[1]#6.33723 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2all5sol2flux111 = solve(prob, alg)
-# n2all5sol2flux111.estimated_ode_params[1] #5.95535 Particles{Float64, 1}
-# n2all5sol2flux222 = solve(prob, alg)
-# n2all5sol2flux222.estimated_ode_params[1] #5.98301 Particles{Float64, 1}
-# n2all5sol2flux333 = solve(prob, alg)
-# n2all5sol2flux333.estimated_ode_params[1] #5.9081 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new all (l2+l22)
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nall5sol2flux1 = solve(prob, alg)
-# nall5sol2flux1.estimated_ode_params[1]#6.54705 Particles{Float64, 1}
-# nall5sol2flux2 = solve(prob, alg)
-# nall5sol2flux2.estimated_ode_params[1]#6.6967 Particles{Float64, 1}
-# nall5sol2flux3 = solve(prob, alg)
-# nall5sol2flux3.estimated_ode_params[1]#6.47173 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nall5sol2flux11 = solve(prob, alg)
-# nall5sol2flux11.estimated_ode_params[1]#6.2113 Particles{Float64, 1}
-# nall5sol2flux22 = solve(prob, alg)
-# nall5sol2flux22.estimated_ode_params[1]#6.10675 Particles{Float64, 1}
-# nall5sol2flux33 = solve(prob, alg)
-# nall5sol2flux33.estimated_ode_params[1]#6.11541 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nall5sol2flux111 = solve(prob, alg)
-# nall5sol2flux111.estimated_ode_params[1]#6.35224 Particles{Float64, 1}
-# nall5sol2flux222 = solve(prob, alg)
-# nall5sol2flux222.estimated_ode_params[1]#6.40542 Particles{Float64, 1}
-# nall5sol2flux333 = solve(prob, alg)
-# nall5sol2flux333.estimated_ode_params[1]#6.44206 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new 5* (new only l22 mod)
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n5sol2flux1 = solve(prob, alg)
-# n5sol2flux1.estimated_ode_params[1]#7.05077 Particles{Float64, 1}
-# n5sol2flux2 = solve(prob, alg)
-# n5sol2flux2.estimated_ode_params[1]#7.07303 Particles{Float64, 1}
-# n5sol2flux3 = solve(prob, alg)
-# n5sol2flux3.estimated_ode_params[1]#5.10622 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n5sol2flux11 = solve(prob, alg)
-# n5sol2flux11.estimated_ode_params[1]#7.39852 Particles{Float64, 1}
-# n5sol2flux22 = solve(prob, alg)
-# n5sol2flux22.estimated_ode_params[1]#7.30319 Particles{Float64, 1}
-# n5sol2flux33 = solve(prob, alg)
-# n5sol2flux33.estimated_ode_params[1]#6.73722 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n5sol2flux111 = solve(prob, alg)
-# n5sol2flux111.estimated_ode_params[1]#7.15996 Particles{Float64, 1}
-# n5sol2flux222 = solve(prob, alg)
-# n5sol2flux222.estimated_ode_params[1]#7.02949 Particles{Float64, 1}
-# n5sol2flux333 = solve(prob, alg)
-# n5sol2flux333.estimated_ode_params[1]#6.9393 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nsol2flux1 = solve(prob, alg)
-# nsol2flux1.estimated_ode_params[1] #5.82707 Particles{Float64, 1}
-# nsol2flux2 = solve(prob, alg)
-# nsol2flux2.estimated_ode_params[1] #4.81534 Particles{Float64, 1}
-# nsol2flux3 = solve(prob, alg)
-# nsol2flux3.estimated_ode_params[1] #5.52965 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nsol2flux11 = solve(prob, alg)
-# nsol2flux11.estimated_ode_params[1] #7.04027 Particles{Float64, 1}
-# nsol2flux22 = solve(prob, alg)
-# nsol2flux22.estimated_ode_params[1] #7.17588 Particles{Float64, 1}
-# nsol2flux33 = solve(prob, alg)
-# nsol2flux33.estimated_ode_params[1] #6.94495 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nsol2flux111 = solve(prob, alg)
-# nsol2flux111.estimated_ode_params[1] #6.06608 Particles{Float64, 1}
-# nsol2flux222 = solve(prob, alg)
-# nsol2flux222.estimated_ode_params[1] #6.84726 Particles{Float64, 1}
-# nsol2flux333 = solve(prob, alg)
-# nsol2flux333.estimated_ode_params[1] #6.83463 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1 = solve(prob, alg)
-# sol2flux1.estimated_ode_params[1] #6.71397 Particles{Float64, 1} 6.37604 Particles{Float64, 1}
-# sol2flux2 = solve(prob, alg)
-# sol2flux2.estimated_ode_params[1] #6.73509 Particles{Float64, 1} 6.21692 Particles{Float64, 1}
-# sol2flux3 = solve(prob, alg)
-# sol2flux3.estimated_ode_params[1] #6.65453 Particles{Float64, 1} 6.23153 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11 = solve(prob, alg)
-# sol2flux11.estimated_ode_params[1] #6.23443 Particles{Float64, 1} 6.30635 Particles{Float64, 1}
-# sol2flux22 = solve(prob, alg)
-# sol2flux22.estimated_ode_params[1] #6.18879 Particles{Float64, 1} 6.30099 Particles{Float64, 1}
-# sol2flux33 = solve(prob, alg)
-# sol2flux33.estimated_ode_params[1] #6.22773 Particles{Float64, 1} 6.30671 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111 = solve(prob, alg)
-# sol2flux111.estimated_ode_params[1] #6.15832 Particles{Float64, 1} 6.35453 Particles{Float64, 1}
-# sol2flux222 = solve(prob, alg)
-# sol2flux222.estimated_ode_params[1] #6.16968 Particles{Float64, 1}6.31125 Particles{Float64, 1}
-# sol2flux333 = solve(prob, alg)
-# sol2flux333.estimated_ode_params[1] #6.12466 Particles{Float64, 1} 6.26514 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1_p = solve(prob, alg)
-# sol2flux1_p.estimated_ode_params[1] #5.74065 Particles{Float64, 1} #6.83683 Particles{Float64, 1}
-# sol2flux2_p = solve(prob, alg)
-# sol2flux2_p.estimated_ode_params[1] #9.82504 Particles{Float64, 1} #6.14568 Particles{Float64, 1}
-# sol2flux3_p = solve(prob, alg)
-# sol2flux3_p.estimated_ode_params[1] #5.75075 Particles{Float64, 1} #6.08579 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11_p = solve(prob, alg)
-# sol2flux11_p.estimated_ode_params[1] #6.19414 Particles{Float64, 1} #6.04621 Particles{Float64, 1}
-# sol2flux22_p = solve(prob, alg)
-# sol2flux22_p.estimated_ode_params[1] #6.15227 Particles{Float64, 1} #6.29086 Particles{Float64, 1}
-# sol2flux33_p = solve(prob, alg)
-# sol2flux33_p.estimated_ode_params[1] #6.19048 Particles{Float64, 1} #6.12516 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111_p = solve(prob, alg)
-# sol2flux111_p.estimated_ode_params[1] #6.51608 Particles{Float64, 1}# 6.42945Particles{Float64, 1}
-# sol2flux222_p = solve(prob, alg)
-# sol2flux222_p.estimated_ode_params[1] #6.4875 Particles{Float64, 1} # 6.44524Particles{Float64, 1}
-# sol2flux333_p = solve(prob, alg)
-# sol2flux333_p.estimated_ode_params[1] #6.51679 Particles{Float64, 1}# 6.43152Particles{Float64, 1}
-
-# # ---------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood, dataset(1.0-2.0)
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux1 = solve(prob, alg)
-# sol1flux1.estimated_ode_params[1] #6.35164 Particles{Float64, 1}
-# sol1flux2 = solve(prob, alg)
-# sol1flux2.estimated_ode_params[1] #6.30919 Particles{Float64, 1}
-# sol1flux3 = solve(prob, alg)
-# sol1flux3.estimated_ode_params[1] #6.33554 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux11 = solve(prob, alg)
-# sol1flux11.estimated_ode_params[1] #6.39769 Particles{Float64, 1}
-# sol1flux22 = solve(prob, alg)
-# sol1flux22.estimated_ode_params[1] #6.43924 Particles{Float64, 1}
-# sol1flux33 = solve(prob, alg)
-# sol1flux33.estimated_ode_params[1] #6.4697 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux111 = solve(prob, alg)
-# sol1flux111.estimated_ode_params[1] #6.27812 Particles{Float64, 1}
-# sol1flux222 = solve(prob, alg)
-# sol1flux222.estimated_ode_params[1] #6.19278 Particles{Float64, 1}
-# sol1flux333 = solve(prob, alg)
-# sol1flux333.estimated_ode_params[1] # 9.68244Particles{Float64, 1} (first try) # 6.23969 Particles{Float64, 1}(second try)
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(1.0-2.0)
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux1_p = solve(prob, alg)
-# sol1flux1_p.estimated_ode_params[1]#6.36269 Particles{Float64, 1}
-
-# sol1flux2_p = solve(prob, alg)
-# sol1flux2_p.estimated_ode_params[1]#6.34685 Particles{Float64, 1}
-
-# sol1flux3_p = solve(prob, alg)
-# sol1flux3_p.estimated_ode_params[1]#6.31421 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux11_p = solve(prob, alg)
-# sol1flux11_p.estimated_ode_params[1] #6.15725 Particles{Float64, 1}
-
-# sol1flux22_p = solve(prob, alg)
-# sol1flux22_p.estimated_ode_params[1] #6.18145 Particles{Float64, 1}
-
-# sol1flux33_p = solve(prob, alg)
-# sol1flux33_p.estimated_ode_params[1] #6.21905 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux111_p = solve(prob, alg)
-# sol1flux111_p.estimated_ode_params[1]#6.13481 Particles{Float64, 1}
-
-# sol1flux222_p = solve(prob, alg)
-# sol1flux222_p.estimated_ode_params[1]#9.68555 Particles{Float64, 1}
-
-# sol1flux333_p = solve(prob, alg)
-# sol1flux333_p.estimated_ode_params[1]#6.1477 Particles{Float64, 1}
-
-# # -----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(1-2), again but different density
-# # 12 points
-# ta = range(1.0, tspan[2], length = 12)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux1_p = solve(prob, alg)
-# sol3flux1_p.estimated_ode_params[1]#6.50048 Particles{Float64, 1}
-# sol3flux2_p = solve(prob, alg)
-# sol3flux2_p.estimated_ode_params[1]#6.57597 Particles{Float64, 1}
-# sol3flux3_p = solve(prob, alg)
-# sol3flux3_p.estimated_ode_params[1]#6.24487 Particles{Float64, 1}
-
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux11_p = solve(prob, alg)
-# sol3flux11_p.estimated_ode_params[1]#6.53093 Particles{Float64, 1}
-
-# sol3flux22_p = solve(prob, alg)
-# sol3flux22_p.estimated_ode_params[1]#6.32744 Particles{Float64, 1}
-
-# sol3flux33_p = solve(prob, alg)
-# sol3flux33_p.estimated_ode_params[1]#6.49175 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux111_p = solve(prob, alg)
-# sol3flux111_p.estimated_ode_params[1]#6.4455 Particles{Float64, 1}
-# sol3flux222_p = solve(prob, alg)
-# sol3flux222_p.estimated_ode_params[1]#6.40736 Particles{Float64, 1}
-# sol3flux333_p = solve(prob, alg)
-# sol3flux333_p.estimated_ode_params[1]#6.46214 Particles{Float64, 1}
-
-# # ---------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(0-1)
-# # 25 points
-# ta = range(tspan[1], 1.0, length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol0flux1_p = solve(prob, alg)
-# sol0flux1_p.estimated_ode_params[1]#7.12625 Particles{Float64, 1}
-# sol0flux2_p = solve(prob, alg)
-# sol0flux2_p.estimated_ode_params[1]#8.40948 Particles{Float64, 1}
-# sol0flux3_p = solve(prob, alg)
-# sol0flux3_p.estimated_ode_params[1]#7.18768 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], 1.0, length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol0flux11_p = solve(prob, alg)
-# sol0flux11_p.estimated_ode_params[1]#6.23707 Particles{Float64, 1}
-# sol0flux22_p = solve(prob, alg)
-# sol0flux22_p.estimated_ode_params[1]#6.09728 Particles{Float64, 1}
-# sol0flux33_p = solve(prob, alg)
-# sol0flux33_p.estimated_ode_params[1]#6.12971 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], 1.0, length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol0flux111_p = solve(prob, alg)
-# sol0flux111_p.estimated_ode_params[1]#5.99039 Particles{Float64, 1}
-# sol0flux222_p = solve(prob, alg)
-# sol0flux222_p.estimated_ode_params[1]#5.89609 Particles{Float64, 1}
-# sol0flux333_p = solve(prob, alg)
-# sol0flux333_p.estimated_ode_params[1]#5.91923 Particles{Float64, 1}
-
-# # ---------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 6.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f1 = solve(prob, alg)
-# sol1f1.estimated_ode_params[1]
-# # 10.9818Particles{Float64, 1}
-# sol1f2 = solve(prob, alg)
-# sol1f2.estimated_ode_params[1]
-# # sol1f3 = solve(prob, alg)
-# # sol1f3.estimated_ode_params[1]
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 6.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f11 = solve(prob, alg)
-# sol1f11.estimated_ode_params[1]
-# sol1f22 = solve(prob, alg)
-# sol1f22.estimated_ode_params[1]
-# # sol1f33 = solve(prob, alg)
-# # sol1f33.estimated_ode_params[1]
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 6.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f111 = solve(prob, alg)
-# sol1f111.estimated_ode_params[1]
-# sol1f222 = solve(prob, alg)
-# sol1f222.estimated_ode_params[1]
-# # sol1f333 = solve(prob, alg)
-# # sol1f333.estimated_ode_params[1]
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f1_p = solve(prob, alg)
-# sol1f1_p.estimated_ode_params[1]
-# sol1f2_p = solve(prob, alg)
-# sol1f2_p.estimated_ode_params[1]
-# sol1f3_p = solve(prob, alg)
-# sol1f3_p.estimated_ode_params[1]
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f11_p = solve(prob, alg)
-# sol1f11_p.estimated_ode_params[1]
-# sol1f22_p = solve(prob, alg)
-# sol1f22_p.estimated_ode_params[1]
-# sol1f33_p = solve(prob, alg)
-# sol1f33_p.estimated_ode_params[1]
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f111_p = solve(prob, alg)
-# sol1f111_p.estimated_ode_params[1]
-# sol1f222_p = solve(prob, alg)
-# sol1f222_p.estimated_ode_params[1]
-# sol1f333_p = solve(prob, alg)
-# sol1f333_p.estimated_ode_params[1]
-
-# # ----------------------------------------------------------
-
-# plot!(title = "9,2.5 50 training 2>full,1>partial")
-
-# p
-# param1
-# # (lux chain)
-# @prob mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 8e-2
-
-# # estimated parameters(lux chain)
-# param1 = sol3lux_pestim.estimated_ode_params[1]
-# @test abs(param1 - p) < abs(0.35 * p)
-
-# p
-# param1
-
-# # # my suggested Loss likelihood part
-# # #  + L2loss2(Tar, θ)
-# # # My suggested extra loss function
-# # function L2loss2(Tar::LogTargetDensity, θ)
-# #     f = Tar.prob.f
-
-# #     # parameter estimation chosen or not
-# #     if Tar.extraparams > 0
-# #         dataset = Tar.dataset
-
-# #         # Timepoints to enforce Physics
-# #         dataset = Array(reduce(hcat, dataset)')
-# #         t = dataset[end, :]
-# #         û = dataset[1:(end - 1), :]
-
-# #         ode_params = Tar.extraparams == 1 ?
-# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-# #         if length(û[:, 1]) == 1
-# #             physsol = [f(û[:, i][1],
-# #                 ode_params,
-# #                 t[i])
-# #                        for i in 1:length(û[1, :])]
-# #         else
-# #             physsol = [f(û[:, i],
-# #                 ode_params,
-# #                 t[i])
-# #                        for i in 1:length(û[1, :])]
-# #         end
-# #         #form of NN output matrix output dim x n
-# #         deri_physsol = reduce(hcat, physsol)
-
-# #         #   OG deriv(basically gradient matching in case of an ODEFunction)
-# #         # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
-# #         # if length(û[:, 1]) == 1
-# #         #     deri_sol = [f(û[:, i][1],
-# #         #         Tar.prob.p,
-# #         #         t[i])
-# #         #                 for i in 1:length(û[1, :])]
-# #         # else
-# #         #     deri_sol = [f(û[:, i],
-# #         #         Tar.prob.p,
-# #         #         t[i])
-# #         #                 for i in 1:length(û[1, :])]
-# #         # end
-# #         # deri_sol = reduce(hcat, deri_sol)
-# #         derivatives = calculate_derivatives(Tar.dataset)
-# #         deri_sol = reduce(hcat, derivatives)
-
-# #         physlogprob = 0
-# #         for i in 1:length(Tar.prob.u0)
-# #             # can add phystd[i] for u[i]
-# #             physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-# #                     LinearAlgebra.Diagonal(map(abs2,
-# #                         Tar.l2std[i] .*
-# #                         ones(length(deri_sol[i, :]))))),
-# #                 deri_sol[i, :])
-# #         end
-# #         return physlogprob
-# #     else
-# #         return 0
-# #     end
-# # end
-
-# # function calculate_derivatives(dataset)
-# #     x̂, time = dataset
-# #     num_points = length(x̂)
-
-# #     # Initialize an array to store the derivative values.
-# #     derivatives = similar(x̂)
-
-# #     for i in 2:(num_points - 1)
-# #         # Calculate the first-order derivative using central differences.
-# #         Δt_forward = time[i + 1] - time[i]
-# #         Δt_backward = time[i] - time[i - 1]
-
-# #         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-# #         derivatives[i] = derivative
-# #     end
-
-# #     # Derivatives at the endpoints can be calculated using forward or backward differences.
-# #     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-# #     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-
-# #     return derivatives
-# # end
-
-# size(dataset[1])
-# # Problem 1 with param estimation(flux,lux)
-# # Normal
-# # 6.20311 Particles{Float64, 1},6.21746Particles{Float64, 1}
-# # better
-# # 6.29093Particles{Float64, 1}, 6.27925Particles{Float64, 1}
-# # Non ideal case
-# # 6.14861Particles{Float64, 1}, 
-# sol2flux.estimated_ode_params
-# sol2lux.estimated_ode_params[1]
-# p
-# size(sol3flux_pestim.ensemblesol[2])
-# plott = sol3flux_pestim.ensemblesol[1]
-# using StatsPlots
-# plotly()
-# plot(t, sol3flux_pestim.ensemblesol[1])
-
-# function calculate_derivatives(dataset)
-#     x̂, time = dataset
-#     num_points = length(x̂)
-
-#     # Initialize an array to store the derivative values.
-#     derivatives = similar(x̂)
-
-#     for i in 2:(num_points - 1)
-#         # Calculate the first-order derivative using central differences.
-#         Δt_forward = time[i + 1] - time[i]
-#         Δt_backward = time[i] - time[i - 1]
-
-#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-#         derivatives[i] = derivative
-#     end
-
-#     # Derivatives at the endpoints can be calculated using forward or backward differences.
-#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-
-#     return derivatives
-# end
-
-# # Example usage:
-# # dataset = [x̂, time]
-# derivatives = calculate_derivatives(dataset)
-# dataset[1]
-# # Access derivative values at specific time points as needed.
-
-# # # 9,0.5
-# # 0.09894916260292887
-# # 0.09870335436072103
-# # 0.08398556878067913
-# # 0.10109070099105527
-# # 0.09122683737517055
-# # 0.08614958011892977
-# # mean(abs.(x̂ .- meanscurve1)) #0.017112298305523976
-# # mean(abs.(physsol1 .- meanscurve1)) #0.004038636894341354
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1))#0.01800876370000113
-# # mean(abs.(physsol1 .- meanscurve1))#0.007285681280600875
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.10599926120358046
-# # mean(abs.(physsol1 .- meanscurve1)) #0.10375554193397989
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.10160824458252521
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09999942538357891
-
-# # # ------------------------------------------------normale
-# # # 9,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.0333356493928835
-# # mean(abs.(physsol1 .- meanscurve1)) #0.02721733876400459
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1)) #0.020734206709433347
-# # mean(abs.(physsol1 .- meanscurve1)) #0.012502850740700212
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.10615859683094729
-# # mean(abs.(physsol1 .- meanscurve1)) #0.10508141153722575
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.10833514946031565
-# # mean(abs.(physsol1 .- meanscurve1)) #0.10668470203219232
-
-# # # 9,0.5
-# # 10.158108285475553
-# # 10.207234384538026
-# # 10.215000657664852
-# # 10.213817644016174
-# # 13.380030074088719
-# # 13.348906350967326
-
-# # 6.952731422892041
-
-# # # All losses
-# # 10.161478523326277
-# # # L2 losses 1
-# # 9.33312996960278
-# # # L2 losses 2
-# # 10.217417241370631
-
-# # mean([fhsamples1[i][26] for i in 500:1000]) #6.245045767509431
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.212522300650451
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #35.328636809737695
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #35.232963812125654
-
-# # # ---------------------------------------normale
-# # # 9,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.547771572198114
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.158906185002702
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.210400972620185
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.153845019454522
-
-# # # ----------------more dataset normale -----------------------------
-# # # 9,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.271141178216537
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.241144692919369
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.124480447973127
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.07838011629903
-
-# # # 9,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.016551602015599295
-# # mean(abs.(physsol1 .- meanscurve1)) #0.0021488618484224245
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1)) #0.017022725082640747
-# # mean(abs.(physsol1 .- meanscurve1)) #0.004339761917100232
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.09668785317864312
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09430712337543362
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.09958118358974392
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09717454226368502
-
-# # # ----------------more dataset special -----------------------------
-# # # 9,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.284355334485365
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.259238106698602
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.139808934336987
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.03921327641226
-
-# # # 9,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.016627231605546876
-# # mean(abs.(physsol1 .- meanscurve1)) #0.0020311429130039564
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1)) #0.016650324577507352
-# # mean(abs.(physsol1 .- meanscurve1)) #0.0027537543411154677
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.09713187937270151
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.09550234866855814
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
-
-# # using Plots, StatsPlots
-# # plotly()
-
-# # ---------------------------------------------------------
-# # # # Distribution abstract in wrapper, dataset Float64
-# # # 268.651 s (206393690 allocations: 388.71 GiB)
-# # # 318.170551 seconds (206.29 M allocations: 388.453 GiB, 20.83% gc time)
-
-# # # # Above with dataset Real subtype
-# # # 326.201 s (206327409 allocations: 388.42 GiB)
-# # # 363.189370 seconds (206.25 M allocations: 387.975 GiB, 15.77% gc time)
-# # # 306.171 s (206321277 allocations: 388.55 GiB)
-# # # 356.180699 seconds (206.43 M allocations: 388.361 GiB, 13.77% gc time)
-
-# # # # Above with dataset AbstractFloat subtype
-# # # 290.751187 seconds (205.94 M allocations: 387.955 GiB, 12.92% gc time)
-# # # 296.319815 seconds (206.38 M allocations: 388.730 GiB, 12.69% gc time)
-
-# # # # ODEProblem float64 dtaset and vector distri inside
-# # #   273.169 s (206128318 allocations: 388.40 GiB)
-# # #   274.059531 seconds (205.91 M allocations: 387.953 GiB, 12.77% gc time)
-
-# # # #   Dataset float64 inside and vector distri outsude
-# # #   333.603 s (206251143 allocations: 388.41 GiB)
-# # # 373.377222 seconds (206.11 M allocations: 387.968 GiB, 13.25% gc time)
-# # #   359.745 s (206348301 allocations: 388.41 GiB)
-# # # 357.813114 seconds (206.31 M allocations: 388.354 GiB, 13.54% gc time)
-
-# # # # Dataset float64 inside and vector distri inside
-# # #   326.437 s (206253571 allocations: 388.41 GiB)
-# # #   290.334083 seconds (205.92 M allocations: 387.954 GiB, 13.82% gc time)
-
-# # # # current setting
-# # # 451.304 s (206476927 allocations: 388.43 GiB)
-# # # 384.532732 seconds (206.22 M allocations: 387.976 GiB, 13.17% gc time)
-# # # 310.223 s (206332558 allocations: 388.63 GiB)
-# # # 344.243889 seconds (206.34 M allocations: 388.409 GiB, 13.84% gc time)
-# # # 357.457737 seconds (206.66 M allocations: 389.064 GiB, 18.16% gc time)
-
-# # # # shit setup
-# # #   325.595 s (206283732 allocations: 388.41 GiB)
-# # # 334.248753 seconds (206.06 M allocations: 387.964 GiB, 12.60% gc time)
-# # #   326.011 s (206370857 allocations: 388.56 GiB)
-# # # 327.203339 seconds (206.29 M allocations: 388.405 GiB, 12.92% gc time)
-
-# # # # in wrapper Distribution prior, insiade FLOAT64 DATASET
-# # # 325.158167 seconds (205.97 M allocations: 387.958 GiB, 15.07% gc time) 
-# # #   429.536 s (206476324 allocations: 388.43 GiB)
-# # #   527.364 s (206740343 allocations: 388.58 GiB)
-
-# # # #   wrapper Distribtuion, inside Float64
-# # # 326.017 s (206037971 allocations: 387.96 GiB)
-# # # 347.424730 seconds (206.45 M allocations: 388.532 GiB, 12.92% gc time)
-
-# # # 439.047568 seconds (284.24 M allocations: 392.598 GiB, 15.25% gc time, 14.36% compilation time: 0% of which was recompilation)
-# # # 375.472142 seconds (206.40 M allocations: 388.529 GiB, 14.93% gc time)
-# # # 374.888820 seconds (206.34 M allocations: 388.346 GiB, 14.09% gc time)
-# # # 363.719611 seconds (206.39 M allocations: 388.581 GiB, 15.08% gc time)
-# # # # inside Distribtion, instide Float64
-# # #   310.238 s (206324249 allocations: 388.53 GiB)
-# # #   308.991494 seconds (206.34 M allocations: 388.549 GiB, 14.01% gc time)
-# # #   337.442 s (206280712 allocations: 388.36 GiB)
-# # #   299.983096 seconds (206.29 M allocations: 388.512 GiB, 17.14% gc time)
-
-# # #   394.924357 seconds (206.27 M allocations: 388.337 GiB, 23.68% gc time)
-# # # 438.204179 seconds (206.39 M allocations: 388.470 GiB, 23.84% gc time)
-# # #   376.626914 seconds (206.46 M allocations: 388.693 GiB, 18.72% gc time)
-# # # 286.863795 seconds (206.14 M allocations: 388.370 GiB, 18.80% gc time)
-# # #   285.556929 seconds (206.22 M allocations: 388.371 GiB, 17.04% gc time)
-# # #   291.471662 seconds (205.96 M allocations: 388.068 GiB, 19.85% gc time)
-
-# # # 495.814341 seconds (284.62 M allocations: 392.622 GiB, 12.56% gc time, 10.96% compilation time: 0% of which was recompilation)
-# # # 361.530617 seconds (206.36 M allocations: 388.526 GiB, 14.98% gc time)
-# # # 348.576065 seconds (206.22 M allocations: 388.337 GiB, 15.01% gc time)
-# # # 374.575609 seconds (206.45 M allocations: 388.586 GiB, 14.65% gc time)
-# # # 314.223008 seconds (206.23 M allocations: 388.411 GiB, 14.63% gc time)
-
-# # PROBLEM-3 LOTKA VOLTERRA EXAMPLE [WIP] (WITH PARAMETER ESTIMATION)(will be put in tutorial page)
-# function lotka_volterra(u, p, t)
-#     # Model parameters.
-#     α, β, γ, δ = p
-#     # Current state.
-#     x, y = u
-
-#     # Evaluate differential equations.
-#     dx = (α - β * y) * x # prey
-#     dy = (δ * x - γ) * y # predator
-
-#     return [dx, dy]
-# end
-
-# u0 = [1.0, 1.0]
-# p = [1.5, 1.0, 3.0, 1.0]
-# tspan = (0.0, 6.0)
-# prob = ODEProblem(lotka_volterra, u0, tspan, p)
-# solution = solve(prob, Tsit5(); saveat = 0.05)
-
-# as = reduce(hcat, solution.u)
-# as[1, :]
-# # Plot simulation.
-# time = solution.t
-# u = hcat(solution.u...)
-# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-# x = u[1, :] + 0.5 * randn(length(u[1, :]))
-# y = u[2, :] + 0.5 * randn(length(u[1, :]))
-# dataset = [x[1:50], y[1:50], time[1:50]]
-# # scatter!(time, [x, y])
-# # scatter!(dataset[3], [dataset[2], dataset[1]])
-
-# # NN has 2 outputs as u -> [dx,dy]
-# chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
-#     Lux.Dense(6, 2))
-# chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
-
-# # fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-# #                                                                           dataset = dataset,
-# #                                                                           draw_samples = 1000,
-# #                                                                           l2std = [
-# #                                                                               0.05,
-# #                                                                               0.05,
-# #                                                                           ],
-# #                                                                           phystd = [
-# #                                                                               0.05,
-# #                                                                               0.05,
-# #                                                                           ],
-# #                                                                           priorsNNw = (0.0,
-# #          
-
-# #   3.0))
-
-# # check if NN output is more than 1
-# # numoutput = size(luxar[1])[1]
-# # if numoutput > 1
-# #     # Initialize a vector to store the separated outputs for each output dimension
-# #     output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput]
-
-# #     # Loop through each element in the `as` vector
-# #     for element in as
-# #         for i in 1:numoutput
-# #             push!(output_matrices[i], element[i, :])  # Append the i-th output (i-th row) to the i-th output_matrices
-# #         end
-# #     end
-
-# #     ensemblecurves = Vector{}[]
-# #     for r in 1:numoutput
-# #         br = hcat(output_matrices[r]...)'
-# #         ensemblecurve = prob.u0[r] .+
-# #                         [Particles(br[:, i]) for i in 1:length(t)] .*
-# #                         (t .- prob.tspan[1])
-# #         push!(ensemblecurves, ensemblecurve)
-# #     end
-
-# # else
-# #     # ensemblecurve = prob.u0 .+
-# #     #                 [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .*
-# #     #                 (t .- prob.tspan[1])
-# #     print("yuh")
-# # end
-
-# # fhsamplesflux2
-# # nnparams = length(init1)
-# # estimnnparams = [Particles(reduce(hcat, fhsamplesflux2)[i, :]) for i in 1:nnparams]
-# # ninv=4
-# # estimated_params = [Particles(reduce(hcat, fhsamplesflux2[(end - ninv + 1):end])[i, :])
-# #                     for i in (nnparams + 1):(nnparams + ninv)]
-# # output_matrices[r]
-# # br = hcat(output_matrices[r]...)'
-
-# # br[:, 1]
-
-# # [Particles(br[:, i]) for i in 1:length(t)]
-# # prob.u0
-# # [Particles(br[:, i]) for i in 1:length(t)] .*
-# # (t .- prob.tspan[1])
-
-# # ensemblecurve = prob.u0[r] .+
-# #                 [Particles(br[:, i]) for i in 1:length(t)] .*
-# #                 (t .- prob.tspan[1])
-# # push!(ensemblecurves, ensemblecurve)
-
-# using StatsPlots
-# plotly()
-# plot(t, ensemblecurve)
-# plot(t, ensemblecurves[1])
-# plot!(t, ensemblecurves[2])
-# ensemblecurve
-# ensemblecurves[1]
-# fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-#     dataset = dataset,
-#     draw_samples = 1000,
-#     l2std = [
-#         0.05,
-#         0.05,
-#     ],
-#     phystd = [
-#         0.05,
-#         0.05,
-#     ],
-#     priorsNNw = (0.0,
-#         3.0),
-#     param = [
-#         Normal(1.5,
-#             0.5),
-#         Normal(1.2,
-#             0.5),
-#         Normal(3.3,
-#             0.5),
-#         Normal(1.4,
-#             0.5),
-#     ], progress = true)
-
-# alg = NeuralPDE.BNNODE(chainflux1,
-#     dataset = dataset,
-#     draw_samples = 1000,
-#     l2std = [
-#         0.05,
-#         0.05,
-#     ],
-#     phystd = [
-#         0.05,
-#         0.05,
-#     ],
-#     priorsNNw = (0.0,
-#         3.0),
-#     param = [
-#         Normal(4.5,
-#             5),
-#         Normal(7,
-#             2),
-#         Normal(5,
-#             2),
-#         Normal(-4,
-#             6),
-#     ],
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux_pestim = solve(prob, alg)
-
-# # OG PARAM VALUES
-# [1.5, 1.0, 3.0, 1.0]
-# # less
-# # [1.34, 7.51, 2.54, -2.55]
-# # better
-# # [1.48, 0.993, 2.77, 0.954]
-
-# sol3flux_pestim.es
-# sol3flux_pestim.estimated_ode_params
-# # fh_mcmc_chainlux1, fhsampleslux1, fhstatslux1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                                        dataset = dataset,
-# #                                                                        draw_samples = 1000,
-# #                                                                        l2std = [0.05, 0.05],
-# #                                                                        phystd = [
-# #                                                                            0.05,
-# #                                                                            0.05,
-# #                                                                        ],
-# #                                                                        priorsNNw = (0.0,
-# #                                                                                     3.0))
-
-# # fh_mcmc_chainlux2, fhsampleslux2, fhstatslux2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                                        dataset = dataset,
-# #                                                                        draw_samples = 1000,
-# #                                                                        l2std = [0.05, 0.05],
-# #                                                                        phystd = [
-# #                                                                            0.05,
-# #                                                                            0.05,
-# #                                                                        ],
-# #                                                                        priorsNNw = (0.0,
-# #                                                                                     3.0),
-# #                                                                        param = [
-# #                                                                            Normal(1.5, 0.5),
-# #                                                                            Normal(1.2, 0.5),
-# #                                                                            Normal(3.3, 0.5),
-# #                                                                            Normal(1.4, 0.5),
-# #                                                                        ])
-
-# init1, re1 = destructure(chainflux1)
-# θinit, st = Lux.setup(Random.default_rng(), chainlux1)
-# #   PLOT testing points
-# t = time
-# p = prob.p
-# collect(Float64, vcat(ComponentArrays.ComponentArray(θinit)))
-# collect(Float64, ComponentArrays.ComponentArray(θinit))
-# # Mean of last 1000 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-# out = re1.([fhsamplesflux1[i][1:68] for i in 500:1000])
-# yu = [out[i](t') for i in eachindex(out)]
-
-# function getensemble(yu, num_models)
-#     num_rows, num_cols = size(yu[1])
-#     row_means = zeros(Float32, num_rows, num_cols)
-#     for i in 1:num_models
-#         row_means .+= yu[i]
-#     end
-#     row_means ./ num_models
-# end
-# fluxmean = getensemble(yu, length(out))
-# meanscurve1_1 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
-# mean(abs.(u .- meanscurve1_1))
-
-# plot!(t, physsol1)
-# @test mean(abs2.(x̂ .- meanscurve1_1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# out = re1.([fhsamplesflux2[i][1:68] for i in 500:1000])
-# yu = collect(out[i](t') for i in eachindex(out))
-# fluxmean = getensemble(yu, length(out))
-# meanscurve1_2 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
-# mean(abs.(u .- meanscurve1_2))
-
-# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# θ = [vector_to_parameters(fhsampleslux1[i][1:(end - 4)], θinit) for i in 500:1000]
-# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
-# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-# meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# θ = [vector_to_parameters(fhsampleslux2[i][1:(end - 4)], θinit) for i in 500:1000]
-# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
-# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-# meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# # # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-# @test abs(p - mean([fhsamplesflux2[i][69] for i in 500:1000])) < 0.1 * p[1]
-# @test abs(p - mean([fhsampleslux2[i][69] for i in 500:1000])) < 0.2 * p[1]
-
-# # @test abs(p - mean([fhsamplesflux2[i][70] for i in 500:1000])) < 0.1 * p[2]
-# # @test abs(p - mean([fhsampleslux2[i][70] for i in 500:1000])) < 0.2 * p[2]
-
-# # @test abs(p - mean([fhsamplesflux2[i][71] for i in 500:1000])) < 0.1 * p[3]
-# # @test abs(p - mean([fhsampleslux2[i][71] for i in 500:1000])) < 0.2 * p[3]
-
-# # @test abs(p - mean([fhsamplesflux2[i][72] for i in 500:1000])) < 0.1 * p[4]
-# # @test abs(p - mean([fhsampleslux2[i][72] for i in 500:1000])) < 0.2 * p[4]
-
-# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                               dataset = dataset,
-# #                                                               draw_samples = 1000,
-# #                                                               l2std = [0.05, 0.05],
-# #                                                               phystd = [0.05, 0.05],
-# #                                                               priorsNNw = (0.0, 3.0),
-# #                                                               param = [
-# #                                                                   Normal(1.5, 0.5),
-# #                                                                   Normal(1.2, 0.5),
-# #                                                                   Normal(3.3, 0.5),
-# #                                                                   Normal(1.4, 0.5),
-# #                                                               ], autodiff = true)
-
-# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                               dataset = dataset,
-# #                                                               draw_samples = 1000,
-# #                                                               l2std = [0.05, 0.05],
-# #                                                               phystd = [0.05, 0.05],
-# #                                                               priorsNNw = (0.0, 3.0),
-# #                                                               param = [
-# #                                                                   Normal(1.5, 0.5),
-# #                                                                   Normal(1.2, 0.5),
-# #                                                                   Normal(3.3, 0.5),
-# #                                                                   Normal(1.4, 0.5),
-# #                                                               ], nchains = 2)
-
-# # NOTES (WILL CLEAR LATER)
-# # --------------------------------------------------------------------------------------------
-# # Hamiltonian energy must be lowest(more paramters the better is it to map onto them)
-# # full better than L2 and phy individual(test)
-# # in mergephys more points after training points is better from 20->40
-# # does consecutive runs bceome better? why?(plot 172)(same chain maybe)
-# # does density of points in timespan matter dataset vs internal timespan?(plot 172)(100+0.01)
-# # when training from 0-1 and phys from 1-5 with 1/150 simple nn slow,but bigger nn faster decrease in Hmailtonian
-# # bigger time interval more curves to adapt to only more parameters adapt to that, better NN architecture
-# # higher order logproblems solve better
-# # repl up up are same instances? but reexecute calls are new?
-
-# #Compare results against paper example
-# # Lux chains support (DONE)
-# # fix predictions for odes depending upon 1,p in f(u,p,t)(DONE)
-# # lotka volterra learn curve beyond l2 losses(L2 losses determine accuracy of parameters)(parameters cant run free ∴ L2 interval only)
-# # check if prameters estimation works(YES)
-# # lotka volterra parameters estimate (DONE)
-
-# using NeuralPDE, Lux, Flux, Optimization, OptimizationOptimJL
-# import ModelingToolkit: Interval
-# using Plots, StatsPlots
-# plotly()
-# # Profile.init()
-
-# @parameters x y
-# @variables u(..)
-# Dxx = Differential(x)^2
-# Dyy = Differential(y)^2
-
-# # 2D PDE
-# eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# # Boundary conditions
-# bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-#     u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
-# # Space and time domains
-# domains = [x ∈ Interval(0.0, 1.0),
-#     y ∈ Interval(0.0, 1.0)]
-
-# # Neural network
-# dim = 2 # number of dimensions
-# chain = Flux.Chain(Flux.Dense(dim, 16, Lux.σ), Flux.Dense(16, 16, Lux.σ), Flux.Dense(16, 1))
-# θ, re = destructure(chain)
-# # Discretization
-# dx = 0.05
-# discretization = PhysicsInformedNN(chain, GridTraining(dx))
-
-# @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-
-# pinnrep = symbolic_discretize(pde_system, discretization)
-# typeof(pinnrep.phi)
-# typeof(pinnrep.phi)
-# typeof(re)
-# pinnrep.phi([1, 2], θ)
-
-# typeof(θ)
-
-# print(pinnrep)
-# pinnrep.eqs
-# pinnrep.bcs
-# pinnrep.domains
-# pinnrep.eq_params
-# pinnrep.defaults
-# print(pinnrep.default_p)
-# pinnrep.param_estim
-# print(pinnrep.additional_loss)
-# pinnrep.adaloss
-# pinnrep.depvars
-# pinnrep.indvars
-# pinnrep.dict_depvar_input
-# pinnrep.dict_depvars
-# pinnrep.dict_indvars
-# print(pinnrep.logger)
-# pinnrep.multioutput
-# pinnrep.iteration
-# pinnrep.init_params
-# pinnrep.flat_init_params
-# pinnrep.phi
-# pinnrep.derivative
-# pinnrep.strategy
-# pinnrep.pde_indvars
-# pinnrep.bc_indvars
-# pinnrep.pde_integration_vars
-# pinnrep.bc_integration_vars
-# pinnrep.integral
-# pinnrep.symbolic_pde_loss_functions
-# pinnrep.symbolic_bc_loss_functions
-# pinnrep.loss_functions
-
-# #  = discretize(pde_system, discretization)
-# prob = symbolic_discretize(pde_system, discretization)
-# # "The boundary condition loss functions"
-# sum([prob.loss_functions.bc_loss_functions[i](θ) for i in eachindex(1:4)])
-# sum([prob.loss_functions.pde_loss_functions[i](θ) for i in eachindex(1)])
-
-# prob.loss_functions.full_loss_function(θ, 32)
-
-# prob.loss_functions.bc_loss_functions[1](θ)
-
-# prob.loss_functions.bc_loss_functions
-# prob.loss_functions.full_loss_function
-# prob.loss_functions.additional_loss_function
-# prob.loss_functions.pde_loss_functions
-
-# 1.3953060473003345 + 1.378102161087438 + 1.395376727128639 + 1.3783868705075002 +
-# 0.22674532775196876
-# # "The PDE loss functions"
-# prob.loss_functions.pde_loss_functions
-# prob.loss_functions.pde_loss_functions[1](θ)
-# # "The full loss function, combining the PDE and boundary condition loss functions.This is the loss function that is used by the optimizer."
-# prob.loss_functions.full_loss_function(θ, nothing)
-# prob.loss_functions.full_loss_function(θ, 423423)
-
-# # "The wrapped `additional_loss`, as pieced together for the optimizer."
-# prob.loss_functions.additional_loss_function
-# # "The pre-data version of the PDE loss function"
-# prob.loss_functions.datafree_pde_loss_functions
-# # "The pre-data version of the BC loss function"
-# prob.loss_functions.datafree_bc_loss_functions
-
-# using Random
-# θ, st = Lux.setup(Random.default_rng(), chain)
-# #Optimizer
-# opt = OptimizationOptimJL.BFGS()
-
-# #Callback function
-# callback = function (p, l)
-#     println("Current loss is: $l")
-#     return false
-# end
-
-# res = Optimization.solve(prob, opt, callback = callback, maxiters = 1000)
-# phi = discretization.phi
-
-# # ------------------------------------------------
-# using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, OrdinaryDiffEq,
-#       Plots
-# import ModelingToolkit: Interval, infimum, supremum
-# @parameters t, σ_, β, ρ
-# @variables x(..), y(..), z(..)
-# Dt = Differential(t)
-# eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-#     Dt(y(t)) ~ x(t) * (ρ - z(t)) - y(t),
-#     Dt(z(t)) ~ x(t) * y(t) - β * z(t)]
-
-# bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-# domains = [t ∈ Interval(0.0, 1.0)]
-# dt = 0.01
-
-# input_ = length(domains)
-# n = 8
-# chain1 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, 1))
-# chain2 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, 1))
-# chain3 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, 1))
-
-# function lorenz!(du, u, p, t)
-#     du[1] = 10.0 * (u[2] - u[1])
-#     du[2] = u[1] * (28.0 - u[3]) - u[2]
-#     du[3] = u[1] * u[2] - (8 / 3) * u[3]
-# end
-
-# u0 = [1.0; 0.0; 0.0]
-# tspan = (0.0, 1.0)
-# prob = ODEProblem(lorenz!, u0, tspan)
-# sol = solve(prob, Tsit5(), dt = 0.1)
-# ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
-# function getData(sol)
-#     data = []
-#     us = hcat(sol(ts).u...)
-#     ts_ = hcat(sol(ts).t...)
-#     return [us, ts_]
-# end
-# data = getData(sol)
-
-# (u_, t_) = data
-# len = length(data[2])
-
-# depvars = [:x, :y, :z]
-# function additional_loss(phi, θ, p)
-#     return sum(sum(abs2, phi[i](t_, θ[depvars[i]]) .- u_[[i], :]) / len for i in 1:1:3)
-# end
-
-# discretization = NeuralPDE.PhysicsInformedNN([chain1, chain2, chain3],
-#                                              NeuralPDE.GridTraining(dt),
-#                                              param_estim = false,
-#                                              additional_loss = additional_loss)
-# @named pde_system = PDESystem(eqs, bcs, domains, [t], [x(t), y(t), z(t)], [σ_, ρ, β],
-#                               defaults = Dict([p .=> 1.0 for p in [σ_, ρ, β]]))
-# prob = NeuralPDE.discretize(pde_system, discretization)
-# callback = function (p, l)
-#     println("Current loss is: $l")
-#     return false
-# end
-# res = Optimization.solve(prob, BFGS(); callback = callback, maxiters = 5000)
-# p_ = res.u[(end - 2):end] # p_ = [9.93, 28.002, 2.667]
-
-# minimizers = [res.u.depvar[depvars[i]] for i in 1:3]
-# ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
-# u_predict = [[discretization.phi[i]([t], minimizers[i])[1] for t in ts] for i in 1:3]
-# plot(sol)
-# plot!(ts, u_predict, label = ["x(t)" "y(t)" "z(t)"])
-
-# discretization.multioutput
-# discretization.chain
-# discretization.strategy
-# discretization.init_params
-# discretization.phi
-# discretization.derivative
-# discretization.param_estim
-# discretization.additional_loss
-# discretization.adaptive_loss
-# discretization.logger
-# discretization.log_options
-# discretization.iteration
-# discretization.self_increment
-# discretization.multioutput
-# discretization.kwargs
-
-# struct BNNODE1{P <: Vector{<:Distribution}}
-#     chain::Any
-#     Kernel::Any
-#     draw_samples::UInt32
-#     priorsNNw::Tuple{Float64, Float64}
-#     param::P
-#     l2std::Vector{Float64}
-#     phystd::Vector{Float64}
-
-#     function BNNODE1(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
-#                      l2std = [0.05], phystd = [0.05])
-#         BNNODE1(chain, Kernel, draw_samples, priorsNNw, param, l2std, phystd)
-#     end
-# end
-
-# struct BNNODE3{C, K, P <: Union{Any, Vector{<:Distribution}}}
-#     chain::C
-#     Kernel::K
-#     draw_samples::UInt32
-#     priorsNNw::Tuple{Float64, Float64}
-#     param::P
-#     l2std::Vector{Float64}
-#     phystd::Vector{Float64}
-
-#     function BNNODE3(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
-#                      l2std = [0.05], phystd = [0.05])
-#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain, Kernel, draw_samples,
-#                                                           priorsNNw, param, l2std, phystd)
-#     end
-# end
-# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-# linear = (u, p, t) -> cos(2 * π * t)
-# tspan = (0.0, 2.0)
-# u0 = 0.0
-# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-
-# ta = range(tspan[1], tspan[2], length = 300)
-# u = [linear_analytic(u0, nothing, ti) for ti in ta]
-# sol1 = solve(prob, Tsit5())
-
-# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂[1:100], time[1:100]]
-
-# # Call BPINN, create chain
-# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
-# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-# HMC
-# solve(prob, BNNODE(chainflux, HMC))
-# BNNODE1(chainflux, HMC, 2000)
-
-# draw_samples = 2000
-# priorsNNw = (0.0, 3.0)
-# param = []
-# l2std = [0.05]
-# phystd = [0.05]
-# @time BNNODE3(chainflux, HMC, draw_samples = 2000, priorsNNw = (0.0, 3.0),
-#               param = [nothing],
-#               l2std = [0.05], phystd = [0.05])
-# typeof(Nothing) <: Vector{<:Distribution}
-# Nothing <: Distribution
-# {UnionAll} <: Distribution
-# @time [Nothing]
-# typeof([Nothing])
-# @time [1]
-
-# function test1(sum; c = 23, d = 32)
-#     return sum + c + d
-# end
-# function test(a, b; c, d)
-#     return test1(a + b, c, d)
-# end
-
-# test(2, 2)
-
-# struct BNNODE3{C, K, P <: Union{Vector{Nothing}, Vector{<:Distribution}}}
-#     chain::C
-#     Kernel::K
-#     draw_samples::Int64
-#     priorsNNw::Tuple{Float64, Float64}
-#     param::P
-#     l2std::Vector{Float64}
-#     phystd::Vector{Float64}
-
-#     function BNNODE3(chain, Kernel; draw_samples,
-#                      priorsNNw, param = [nothing], l2std, phystd)
-#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain,
-#                                                           Kernel,
-#                                                           draw_samples,
-#                                                           priorsNNw,
-#                                                           param, l2std,
-#                                                           phystd)
-#     end
-# end
-
-# function solve1(prob::DiffEqBase.AbstractODEProblem, alg::BNNODE3;
-#                 dataset = [nothing], dt = 1 / 20.0,
-#                 init_params = nothing, nchains = 1,
-#                 autodiff = false, Integrator = Leapfrog,
-#                 Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
-#                 Metric = DiagEuclideanMetric, jitter_rate = 3.0,
-#                 tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
-#                 n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = true,
-#                 verbose = false)
-#     chain = alg.chain
-#     l2std = alg.l2std
-#     phystd = alg.phystd
-#     priorsNNw = alg.priorsNNw
-#     Kernel = alg.Kernel
-#     draw_samples = alg.draw_samples
-
-#     param = alg.param == [nothing] ? [] : alg.param
-#     mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain, dataset = dataset,
-#                                                             draw_samples = draw_samples,
-#                                                             init_params = init_params,
-#                                                             physdt = dt, l2std = l2std,
-#                                                             phystd = phystd,
-#                                                             priorsNNw = priorsNNw,
-#                                                             param = param,
-#                                                             nchains = nchains,
-#                                                             autodiff = autodiff,
-#                                                             Kernel = Kernel,
-#                                                             Integrator = Integrator,
-#                                                             Adaptor = Adaptor,
-#                                                             targetacceptancerate = targetacceptancerate,
-#                                                             Metric = Metric,
-#                                                             jitter_rate = jitter_rate,
-#                                                             tempering_rate = tempering_rate,
-#                                                             max_depth = max_depth,
-#                                                             Δ_max = Δ_max,
-#                                                             n_leapfrog = n_leapfrog, δ = δ,
-#                                                             λ = λ, progress = progress,
-#                                                             verbose = verbose)
-# end
-
-# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-# linear = (u, p, t) -> cos(2 * π * t)
-# tspan = (0.0, 2.0)
-# u0 = 0.0
-# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-
-# ta = range(tspan[1], tspan[2], length = 300)
-# u = [linear_analytic(u0, nothing, ti) for ti in ta]
-# # sol1 = solve(prob, Tsit5())
-
-# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂[1:100], time[1:100]]
-
-# # Call BPINN, create chain
-# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
-# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-# HMC
-
-# solve1(prob, a)
-# a = BNNODE3(chainflux, HMC, draw_samples = 2000,
-#             priorsNNw = (0.0, 3.0),
-#             l2std = [0.05], phystd = [0.05])
-
-# Define Lotka-Volterra model.
-function lotka_volterra1(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-u0 = [1.0, 1.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 6.0)
-prob = ODEProblem(lotka_volterra1, u0, tspan, p)
-solution = solve(prob, Tsit5(); saveat = 0.05)
-
-as = reduce(hcat, solution.u)
-as[1, :]
-# Plot simulation.
-time = solution.t
-u = hcat(solution.u...)
-# BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-x = u[1, :] + 0.5 * randn(length(u[1, :]))
-y = u[2, :] + 0.5 * randn(length(u[1, :]))
-dataset = [x[1:50], y[1:50], time[1:50]]
-# scatter!(time, [x, y])
-# scatter!(dataset[3], [dataset[2], dataset[1]])
-
-# NN has 2 outputs as u -> [dx,dy]
-chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
-    Lux.Dense(6, 2))
-chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
-
-fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [
-        0.05,
-        0.05,
-    ],
-    phystd = [
-        0.05,
-        0.05,
-    ],
-    priorsNNw = (0.0, 3.0), progress = true)
-ahmc_bayesian_pinn_ode(prob, chainflux1,
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [
-        0.05,
-        0.05,
-    ],
-    phystd = [
-        0.05,
-        0.05,
-    ],
-    priorsNNw = (0.0, 3.0), progress = true)
-
-#     2×171 Matrix{Float64}:
-#  -0.5  -0.518956  -0.529639  …  -1.00266  -1.01049
-#   2.0   1.97109    1.92747       0.42619   0.396335
-
-#     2-element Vector{Float64}:
-#  -119451.94949911036
-#  -128543.23714618056
-
-# alg = NeuralPDE.BNNODE(chainflux1,
-#     dataset = dataset,
-#     draw_samples = 1000,
-#     l2std = [
-#         0.05,
-#         0.05,
-#     ],
-#     phystd = [
-#         0.05,
-#         0.05,
-#     ],
-#     priorsNNw = (0.0,
-#         3.0),
-#     param = [
-#         Normal(4.5,
-#             5),
-#         Normal(7,
-#             2),
-#         Normal(5,
-#             2),
-#         Normal(-4,
-#             6),
-#     ],
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux_pestim = solve(prob, alg)
-
-#  ----------------------------------------------
-# original paper implementation
-# 25 points 
-run1  #7.70593 Particles{Float64, 1}
-run2 #6.66347 Particles{Float64, 1} 
-run3 #6.84827 Particles{Float64, 1} 
-
-# 50 points 
-run1 #7.83577 Particles{Float64, 1}
-run2 #6.49477 Particles{Float64, 1}
-run3 #6.47421 Particles{Float64, 1}
-
-# 100 points 
-run1 #5.96604 Particles{Float64, 1}
-run2 #6.05432 Particles{Float64, 1}
-run3 #6.08856 Particles{Float64, 1}
-
-# Full likelihood(uses total variation regularized differentiation) 
-# 25 points 
-run1 #6.41722 Particles{Float64, 1}
-run2 #6.42782 Particles{Float64, 1}
-run3 #6.42782 Particles{Float64, 1}
-
-# 50 points
-run1 #5.71268 Particles{Float64, 1}
-run2 #5.74599 Particles{Float64, 1}
-run3 #5.74599 Particles{Float64, 1}
-
-# 100 points  
-run1 #6.59097 Particles{Float64, 1}
-run2 #6.62813 Particles{Float64, 1}
-run3 #6.62813 Particles{Float64, 1}
-
-using Plots, StatsPlots
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u0 = [1.0, 1.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 6.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-
-# Plot simulation.
-
-solution = solve(prob, Tsit5(); saveat = 0.05)
-plot(solve(prob, Tsit5()))
-
-# Dataset creation for parameter estimation
-time = solution.t
-u = hcat(solution.u...)
-x = u[1, :] + 0.5 * randn(length(u[1, :]))
-y = u[2, :] + 0.5 * randn(length(u[1, :]))
-dataset = [x, y, time]
-
-# Neural Networks must have 2 outputs as u -> [dx,dy] in function lotka_volterra()
-chainflux = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2)) |>
-            Flux.f64
-
-chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
-
-alg1 = NeuralPDE.BNNODE(chainflux,
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol_flux_pestim = solve(prob, alg1)
-
-# Dataset not needed as we are solving the equation with ideal parameters
-alg2 = NeuralPDE.BNNODE(chainlux,
-    draw_samples = 1000,
-    l2std = [
-        0.05,
-        0.05,
-    ],
-    phystd = [
-        0.05,
-        0.05,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    n_leapfrog = 30, progress = true)
-
-sol_lux = solve(prob, alg2)
-
-#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
-t = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-
-# plotting solution for x,y for chain_flux
-plot(t, sol_flux_pestim.ensemblesol[1])
-plot!(t, sol_flux_pestim.ensemblesol[2])
-
-plot(sol_flux_pestim.ens1mblesol[1])
-plot!(sol_flux_pestim.ensemblesol[2])
-
-# estimated ODE parameters by .estimated_ode_params, weights and biases by .estimated_nn_params
-sol_flux_pestim.estimated_nn_params
-sol_flux_pestim.estimated_ode_params
-
-# plotting solution for x,y for chain_lux
-plot(t, sol_lux.ensemblesol[1])
-plot!(t, sol_lux.ensemblesol[2])
-
-# estimated weights and biases by .estimated_nn_params for chain_lux
-sol_lux.estimated_nn_params
-
-# # ----------------------------------stats-----------------------------
-# #   ----------------------------
-# # -----------------------------
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:04:47 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:03:38 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:04:12 
-# #   --------------------------
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:05:09 
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:47 
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:25 
-# #   --------------
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:06:47 
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:05:54
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:05:46
-# # ------------------------
-# # -----------------------
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -882.2934218498742
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:04:06
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -882.2934218498742
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:03:32
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -882.2934218498742
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:03:01 
-# # --------------------------
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1411.1717435511828
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:02
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1411.1717435511828
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:08
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1411.1717435511828
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:15
-# # ----------------------------
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -3240.067149411982
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:05:37
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -3240.067149411982
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:06:02
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -3240.067149411982
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:06:13
-
-using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL
-import ModelingToolkit: Interval, infimum, supremum
-
-using NeuralPDE, Flux, OptimizationOptimisers
-
-function diffeq(u, p, t)
-    u1, u2 = u
-    return [u2, p[1] + p[2] * sin(u1) + p[3] * u2]
-end
-p = [5, -10, -1.7]
-u0 = [-1.0, 7.0]
-tspan = (0.0, 10.0)
-prob = ODEProblem(ODEFunction(diffeq), u0, tspan, p)
-
-chainnew = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2)) |>
-           Flux.f64
-
-opt = OptimizationOptimisers.Adam(0.1)
-opt = Optimisers.ADAGrad(0.1)
-opt = Optimisers.AdaMax(0.01)
-algnew = NeuralPDE.NNODE(chainnew, opt)
-solution_new = solve(prob, algnew, verbose = true,
-    abstol = 1e-10, maxiters = 7000)
-u = reduce(hcat, solution_new.u)
-plot(solution_new.t, u[1, :])
-plot!(solution_new.t, u[2, :])
-
-algnew = NeuralPDE.BNNODE(chainnew, draw_samples = 200,
-    n_leapfrog = 30, progress = true)
-solution_new = solve(prob, algnew)
-
-@parameters t
-@variables u1(..), u2(..)
-D = Differential(t)
-eq = [D(u1(t)) ~ u2(t),
-    D(u2(t)) ~ 5 - 10 * sin(u1(t)) - 1.7 * u2(t)];
-
-import ModelingToolkit: Interval
-bcs = [u1(0) ~ -1, u2(0) ~ 7]
-domains = [t ∈ Interval(0.0, 10.0)]
-dt = 0.01
-
-input_ = length(domains) # number of dimensions
-n = 16
-chain = [Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ), Lux.Dense(n, 1))
-         for _ in 1:2]
-
-@named pde_system = PDESystem(eq, bcs, domains, [t], [u1(t), u2(t)])
-
-strategy = NeuralPDE.GridTraining(dt)
-discretization = PhysicsInformedNN(chain, strategy)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-
-pde_loss_functions = sym_prob.loss_functions.pde_loss_functions
-bc_loss_functions = sym_prob.loss_functions.bc_loss_functions
-
-callback = function (p, l)
-    println("loss: ", l)
-    # println("pde_losses: ", map(l_ -> l_(p), pde_loss_functions))
-    # println("bcs_losses: ", map(l_ -> l_(p), bc_loss_functions))
-    return false
-end
-
-loss_functions = [pde_loss_functions; bc_loss_functions]
-
-function loss_function(θ, p)
-    sum(map(l -> l(θ), loss_functions))
-end
-
-f_ = OptimizationFunction(loss_function, Optimization.AutoZygote())
-prob = Optimization.OptimizationProblem(f_, sym_prob.flat_init_params)
-
-res = Optimization.solve(prob,
-    OptimizationOptimJL.BFGS();
-    callback = callback,
-    maxiters = 1000)
-phi = discretization.phi
\ No newline at end of file

From a5c3148724bf8d2569303ef912d589c985689e53 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 21 Jan 2024 00:01:34 +0530
Subject: [PATCH 007/107] update advancedHMC_MCMC.jl

---
 src/advancedHMC_MCMC.jl | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index e1ccc47261..6fee4a818e 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -467,17 +467,17 @@ Incase you are only solving the Equations for solution, do not provide dataset
 priors: pdf for W,b + pdf for ODE params
 """
 function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
-    strategy = GridTraining, dataset = [nothing],
-    init_params = nothing, draw_samples = 1000,
-    physdt = 1 / 20.0, l2std = [0.05],
-    phystd = [0.05], priorsNNw = (0.0, 2.0),
-    param = [], nchains = 1, autodiff = false,
-    Kernel = HMC,
-    Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-        Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-    Integratorkwargs = (Integrator = Leapfrog,),
-    MCMCkwargs = (n_leapfrog = 30,),
-    progress = false, verbose = false)
+        strategy = GridTraining, dataset = [nothing],
+        init_params = nothing, draw_samples = 1000,
+        physdt = 1 / 20.0, l2std = [0.05],
+        phystd = [0.05], priorsNNw = (0.0, 2.0),
+        param = [], nchains = 1, autodiff = false,
+        Kernel = HMC,
+        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+        Integratorkwargs = (Integrator = Leapfrog,),
+        MCMCkwargs = (n_leapfrog = 30,),
+        progress = false, verbose = false)
 
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)

From f8427a3aca489008be90cff14435bc87d755a8f4 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 21 Jan 2024 00:07:36 +0530
Subject: [PATCH 008/107] update advancedHMC_MCMC.jl

---
 src/advancedHMC_MCMC.jl | 72 ++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 6fee4a818e..1a2c47de0d 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -18,9 +18,9 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
     init_params::I
 
     function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
-            dataset,
-            priors, phystd, l2std, autodiff, physdt, extraparams,
-            init_params::AbstractVector)
+        dataset,
+        priors, phystd, l2std, autodiff, physdt, extraparams,
+        init_params::AbstractVector)
         new{
             typeof(chain),
             Nothing,
@@ -42,9 +42,9 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
             init_params)
     end
     function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
-            dataset,
-            priors, phystd, l2std, autodiff, physdt, extraparams,
-            init_params::NamedTuple)
+        dataset,
+        priors, phystd, l2std, autodiff, physdt, extraparams,
+        init_params::NamedTuple)
         new{
             typeof(chain),
             typeof(st),
@@ -138,8 +138,8 @@ function physloglikelihood(Tar::LogTargetDensity, θ)
 end
 
 function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
-        tspan,
-        ode_params, θ)
+    tspan,
+    ode_params, θ)
     if Tar.dataset isa Vector{Nothing}
         t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
     else
@@ -152,12 +152,12 @@ function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::B
 end
 
 function getlogpdf(strategy::StochasticTraining,
-        Tar::LogTargetDensity,
-        f,
-        autodiff::Bool,
-        tspan,
-        ode_params,
-        θ)
+    Tar::LogTargetDensity,
+    f,
+    autodiff::Bool,
+    tspan,
+    ode_params,
+    θ)
     if Tar.dataset isa Vector{Nothing}
         t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
     else
@@ -170,9 +170,9 @@ function getlogpdf(strategy::StochasticTraining,
 end
 
 function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
-        autodiff::Bool,
-        tspan,
-        ode_params, θ)
+    autodiff::Bool,
+    tspan,
+    ode_params, θ)
     function integrand(t::Number, θ)
         innerdiff(Tar, f, autodiff, [t], θ, ode_params)
     end
@@ -182,9 +182,9 @@ function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
 end
 
 function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
-        autodiff::Bool,
-        tspan,
-        ode_params, θ)
+    autodiff::Bool,
+    tspan,
+    ode_params, θ)
     minT = tspan[1]
     maxT = tspan[2]
 
@@ -217,7 +217,7 @@ end
 MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ 
 """
 function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
-        ode_params)
+    ode_params)
 
     # Tar used for phi and LogTargetDensity object attributes access
     out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
@@ -300,12 +300,12 @@ end
 nn OUTPUT AT t,θ ~ phi(t,θ)
 """
 function (f::LogTargetDensity{C, S})(t::AbstractVector,
-        θ) where {C <: Optimisers.Restructure, S}
+    θ) where {C <: Optimisers.Restructure, S}
     f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
 end
 
 function (f::LogTargetDensity{C, S})(t::AbstractVector,
-        θ) where {C <: Lux.AbstractExplicitLayer, S}
+    θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
     y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
@@ -313,13 +313,13 @@ function (f::LogTargetDensity{C, S})(t::AbstractVector,
 end
 
 function (f::LogTargetDensity{C, S})(t::Number,
-        θ) where {C <: Optimisers.Restructure, S}
+    θ) where {C <: Optimisers.Restructure, S}
     #  must handle paired odes hence u0 broadcasted
     f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t]))
 end
 
 function (f::LogTargetDensity{C, S})(t::Number,
-        θ) where {C <: Lux.AbstractExplicitLayer, S}
+    θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
     y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
@@ -467,17 +467,17 @@ Incase you are only solving the Equations for solution, do not provide dataset
 priors: pdf for W,b + pdf for ODE params
 """
 function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
-        strategy = GridTraining, dataset = [nothing],
-        init_params = nothing, draw_samples = 1000,
-        physdt = 1 / 20.0, l2std = [0.05],
-        phystd = [0.05], priorsNNw = (0.0, 2.0),
-        param = [], nchains = 1, autodiff = false,
-        Kernel = HMC,
-        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-        Integratorkwargs = (Integrator = Leapfrog,),
-        MCMCkwargs = (n_leapfrog = 30,),
-        progress = false, verbose = false)
+    strategy = GridTraining, dataset = [nothing],
+    init_params = nothing, draw_samples = 1000,
+    physdt = 1 / 20.0, l2std = [0.05],
+    phystd = [0.05], priorsNNw = (0.0, 2.0),
+    param = [], nchains = 1, autodiff = false,
+    Kernel = HMC,
+    Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+        Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+    Integratorkwargs = (Integrator = Leapfrog,),
+    MCMCkwargs = (n_leapfrog = 30,),
+    progress = false, verbose = false)
 
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)

From 237fb454ac763cd4853bf35a2fc38520643eae52 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 3 Feb 2024 17:00:05 +0530
Subject: [PATCH 009/107] most of logic done

---
 Project.toml                  |   1 +
 src/NeuralPDE.jl              |   1 +
 src/PDE_BPINN.jl              | 177 +++++++++++++++++++++++++++-------
 test/BPINN_PDEinvsol_tests.jl | 163 +++++++++++++++++++++++++++++--
 4 files changed, 299 insertions(+), 43 deletions(-)

diff --git a/Project.toml b/Project.toml
index 1013977bad..d83eaa368d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,6 +10,7 @@ ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 Cubature = "667455a9-e2ce-5579-9412-b964f529a492"
+DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 931fb24a5c..80d9abdc62 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -24,6 +24,7 @@ using Symbolics: wrap, unwrap, arguments, operation
 using SymbolicUtils
 using AdvancedHMC, LogDensityProblems, LinearAlgebra, Functors, MCMCChains
 using MonteCarloMeasurements
+using DataInterpolations: LinearInterpolation
 
 import ModelingToolkit: value, nameof, toexpr, build_expr, expand_derivatives
 import DomainSets: Domain, ClosedInterval
diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index b63741e9b6..d5792d5e35 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -3,7 +3,7 @@ mutable struct PDELogTargetDensity{
     D <: Union{Nothing, Vector{<:Matrix{<:Real}}},
     P <: Vector{<:Distribution},
     I,
-    F,
+    F, FF,
     PH,
 }
     dim::Int64
@@ -15,17 +15,18 @@ mutable struct PDELogTargetDensity{
     extraparams::Int
     init_params::I
     full_loglikelihood::F
+    L2_loss2::FF
     Φ::PH
 
     function PDELogTargetDensity(dim, strategy, dataset,
             priors, allstd, names, extraparams,
-            init_params::AbstractVector, full_loglikelihood, Φ)
+            init_params::AbstractVector, full_loglikelihood, L2_loss2, Φ)
         new{
             typeof(strategy),
             typeof(dataset),
             typeof(priors),
             typeof(init_params),
-            typeof(full_loglikelihood),
+            typeof(full_loglikelihood), typeof(L2_loss2),
             typeof(Φ),
         }(dim,
             strategy,
@@ -35,19 +36,19 @@ mutable struct PDELogTargetDensity{
             names,
             extraparams,
             init_params,
-            full_loglikelihood,
+            full_loglikelihood, L2_loss2,
             Φ)
     end
     function PDELogTargetDensity(dim, strategy, dataset,
             priors, allstd, names, extraparams,
             init_params::Union{NamedTuple, ComponentArrays.ComponentVector},
-            full_loglikelihood, Φ)
+            full_loglikelihood, L2_loss2, Φ)
         new{
             typeof(strategy),
             typeof(dataset),
             typeof(priors),
             typeof(init_params),
-            typeof(full_loglikelihood),
+            typeof(full_loglikelihood), typeof(L2_loss2),
             typeof(Φ),
         }(dim,
             strategy,
@@ -57,23 +58,122 @@ mutable struct PDELogTargetDensity{
             names,
             extraparams,
             init_params,
-            full_loglikelihood,
+            full_loglikelihood, L2_loss2,
             Φ)
     end
 end
 
+# dataset_pde has normal matrix format 
+# dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
+function get_symbols(dict_depvar_input, dataset, depvars)
+    # get datasets into splattable form
+    splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+    # splat datasets onto Linear interpolations tables
+    interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+    interps = Dict(depvars .=> interps)
+
+    Dict_symbol_interps = Dict(depvar => (interps[depvar], dict_depvar_input[depvar])
+                               for depvar in depvars)
+
+    tobe_subs = Dict()
+    for (a, b) in dict_depvar_input
+        tobe_subs[a] = eval(:($a($(b...))))
+    end
+
+    to_subs = Dict()
+    for (a, b) in Dict_symbol_interps
+        b1, b2 = b
+        to_subs[a] = eval(:($b1($(b2...))))
+    end
+    return to_subs, tobe_subs
+end
+
+function recur_expression(exp, Dict_differentials)
+    for in_exp in exp.args
+        if !(in_exp isa Expr)
+            # skip +,== symbols, characters etc
+            continue
+
+        elseif in_exp.args[1] isa ModelingToolkit.Differential
+            # first symbol of differential term
+            # Dict_differentials for masking differential terms
+            # and resubstituting differentials in equations after putting in interpolations
+            Dict_differentials[eval(in_exp)] = Symbol("diff_$(length(Dict_differentials)+1)")
+            return
+
+        else
+            recur_expression(in_exp, Dict_differentials)
+        end
+    end
+end
+
+# get datafree loss functions for new loss type
+# need to call merge_strategy_with_loss_function() variant after this
+function merge_dataset_with_loss_function(pinnrep::NeuralPDE.PINNRepresentation,
+        dataset,
+        datafree_pde_loss_function,
+        datafree_bc_loss_function)
+    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
+
+    eltypeθ = eltype(pinnrep.flat_init_params)
+
+    train_sets = [[dataset[i][:, 2] for i in eachindex(dataset)], [[0;;], [0;;], [0;;]]]
+
+    # the points in the domain and on the boundary
+    pde_train_sets, bcs_train_sets = train_sets
+    pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+        pde_train_sets)
+    bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+        bcs_train_sets)
+    pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
+                          for (_loss, _set) in zip(datafree_pde_loss_function,
+        pde_train_sets)]
+
+    bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
+                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
+
+    pde_loss_functions, bc_loss_functions
+end
+
+function get_loss_function(loss_function, train_set, eltypeθ; τ = nothing)
+    loss = (θ) -> mean(abs2, loss_function(train_set, θ))
+end
+
+# for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
+# and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
+# eqs is vector of pde eqs and dataset here is dataset_pde
+# normally you get vector of losses
+function get_loss_2(pinnrep, dataset, eqs)
+    depvars = pinnrep.depvars # order is same as dataset and interps
+    dict_depvar_input = pinnrep.dict_depvar_input
+
+    to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars)
+    interp_subs_dict = Dict(tobe_subs[depvar] => to_subs[depvar] for depvar in depvars)
+
+    Dict_differentials = Dict()
+    exp = toexpr(eqs)
+    void_value = [recur_expression(exp_i, Dict_differentials) for exp_i in exp]
+    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
+
+    # masking operation
+    a = substitute.(eqs, Ref(Dict_differentials))
+    b = substitute.(a, Ref(interp_subs_dict))
+    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
+    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
+    eqs = substitute.(b, Ref(rev_Dict_differentials))
+    # get losses
+    loss_functions = [NeuralPDE.build_loss_function(pinnrep,
+        eqs[i],
+        pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
+end
+
 function LogDensityProblems.logdensity(Tar::PDELogTargetDensity, θ)
     # for parameter estimation neccesarry to use multioutput case
     return Tar.full_loglikelihood(setparameters(Tar, θ),
-               Tar.allstd) + priorlogpdf(Tar, θ) + L2LossData(Tar, θ)
-    # + L2loss2(Tar, θ)
+               Tar.allstd) + priorlogpdf(Tar, θ) + Tar.L2_loss2(setparameters(Tar, θ),
+               Tar.allstd)
 end
 
-# function L2loss2(Tar::PDELogTargetDensity, θ)
-#     return Tar.full_loglikelihood(setparameters(Tar, θ),
-#         Tar.allstd)
-# end
-
 function setparameters(Tar::PDELogTargetDensity, θ)
     names = Tar.names
     ps_new = θ[1:(end - Tar.extraparams)]
@@ -131,6 +231,8 @@ function L2LossData(Tar::PDELogTargetDensity, θ)
 
     # dataset of form Vector[matrix_x, matrix_y, matrix_z]
     # matrix_i is of form [i,indvar1,indvar2,..] (needed in case if heterogenous domains)
+    # note that indvar1,indvar2.. cols can be different values for different depvar matrices
+    # order follows pinnrep.depvars orders of variables (order of declaration in @variables macro)
 
     # Phi is the trial solution for each NN in chain array
     # Creating logpdf( MvNormal(Phi(t,θ),std), dataset[i] )
@@ -188,27 +290,6 @@ function priorlogpdf(Tar::PDELogTargetDensity, θ)
     end
 end
 
-function integratorchoice(Integratorkwargs, initial_ϵ)
-    Integrator = Integratorkwargs[:Integrator]
-    if Integrator == JitteredLeapfrog
-        jitter_rate = Integratorkwargs[:jitter_rate]
-        Integrator(initial_ϵ, jitter_rate)
-    elseif Integrator == TemperedLeapfrog
-        tempering_rate = Integratorkwargs[:tempering_rate]
-        Integrator(initial_ϵ, tempering_rate)
-    else
-        Integrator(initial_ϵ)
-    end
-end
-
-function adaptorchoice(Adaptor, mma, ssa)
-    if Adaptor != AdvancedHMC.NoAdaptation()
-        Adaptor(mma, ssa)
-    else
-        AdvancedHMC.NoAdaptation()
-    end
-end
-
 function inference(samples, pinnrep, saveats, numensemble, ℓπ)
     domains = pinnrep.domains
     phi = pinnrep.phi
@@ -353,6 +434,30 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     pinnrep = symbolic_discretize(pde_system, discretization)
     dataset_pde, dataset_bc = discretization.dataset
 
+    eqs = pinnrep.eqs
+    yuh1 = get_loss_2(pinnrep, dataset_pde, eqs)
+    eqs = pinnrep.bcs
+    yuh2 = get_loss_2(pinnrep, dataset_bc, eqs)
+
+    pde_loss_functions, bc_loss_functions = merge_dataset_with_loss_function(pinnrep,
+        dataset,
+        yuh1,
+        yuh2)
+
+    function L2_loss2(θ, allstd)
+        stdpdes, stdbcs, stdextra = allstd
+        pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
+                              for (i, pde_loss_function) in enumerate(pde_loss_functions)]
+
+        bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+                             for (j, bc_loss_function) in enumerate(bc_loss_functions)]
+        println("pde_loglikelihoods : ", pde_loglikelihoods)
+        println("bc_loglikelihoods : ", bc_loglikelihoods)
+        return sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+    end
+
+    println(L2_loss2)
+    # WIP split dataset to respective equations
     if ((dataset_bc isa Nothing) && (dataset_pde isa Nothing))
         dataset = nothing
     elseif dataset_bc isa Nothing
@@ -441,7 +546,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         names,
         ninv,
         initial_nnθ,
-        full_weighted_loglikelihood,
+        full_weighted_loglikelihood, L2_loss2,
         Φ)
 
     Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 3521c8c913..4876328413 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -3,7 +3,7 @@ import ModelingToolkit: Interval, infimum, supremum
 using ForwardDiff, Distributions, OrdinaryDiffEq
 using Flux, AdvancedHMC, Statistics, Random, Functors
 using NeuralPDE, MonteCarloMeasurements
-using ComponentArrays
+using ComponentArrays, ModelingToolkit
 
 Random.seed!(100)
 
@@ -32,9 +32,9 @@ initl, st = Lux.setup(Random.default_rng(), chainl)
 
 analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
 timepoints = collect(0.0:(1 / 100.0):2.0)
-u = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u = u .+ (u .* 0.2) .* randn(size(u))
-dataset = [hcat(u, timepoints)]
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
 
 # plot(dataset[1][:, 2], dataset[1][:, 1])
 # plot!(timepoints, u)
@@ -51,7 +51,7 @@ ahmc_bayesian_pinn_pde(pde_system,
     phystd = [0.01], l2std = [0.01],
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
+    param = [LogNormal(6.0, 0.5)], progress = true)
 
 discretization = NeuralPDE.BayesianPINN([chainl],
     QuasiRandomTraining(200),
@@ -175,7 +175,7 @@ discretization = NeuralPDE.BayesianPINN(chain, NeuralPDE.GridTraining([0.01]);
 
 sol1 = ahmc_bayesian_pinn_pde(pde_system,
     discretization;
-    draw_samples = 50,
+    draw_samples = 20,
     bcstd = [0.3, 0.3, 0.3],
     phystd = [0.1, 0.1, 0.1],
     l2std = [1, 1, 1],
@@ -192,4 +192,153 @@ p_ = sol1.estimated_de_params[1]
 # plot!(sol1.timepoints[3]', pmean(sol1.ensemblesol[3]))
 
 @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
\ No newline at end of file
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+
+# # NEW LOSS FUNCTION CODE
+# pinnrep = symbolic_discretize(pde_system, discretization)
+
+# # general equation with diff
+# # now 1> substute u(t), phi(t) values from dataset and get multiple equations
+# # phi[i] must be in numeric_derivative() form
+# # derivative(phi, u, [x, y], εs, order, θ) - use parse_equations() and interp object to create loss function
+# # this function must take interp objects(train sets)
+# # dataset - get u(t), t from dataset interpolations object
+# # make lhs-rhs loss
+# # sum losses
+
+# using DataInterpolations
+
+# # dataset_pde has normal matrix format 
+# # dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
+# function get_symbols(dict_depvar_input, dataset, depvars)
+#     # get datasets into splattable form
+#     splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+#     # splat datasets onto Linear interpolations tables
+#     interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+#     interps = Dict(depvars .=> interps)
+
+#     Dict_symbol_interps = Dict(depvar => (interps[depvar], dict_depvar_input[depvar])
+#                                for depvar in depvars)
+
+#     tobe_subs = Dict()
+#     for (a, b) in dict_depvar_input
+#         tobe_subs[a] = eval(:($a($(b...))))
+#     end
+
+#     to_subs = Dict()
+#     for (a, b) in Dict_symbol_interps
+#         b1, b2 = b
+#         to_subs[a] = eval(:($b1($(b2...))))
+#     end
+#     return to_subs, tobe_subs
+# end
+
+# function recur_expression(exp, Dict_differentials)
+#     for in_exp in exp.args
+#         if !(in_exp isa Expr)
+#             # skip +,== symbols, characters etc
+#             continue
+
+#         elseif in_exp.args[1] isa ModelingToolkit.Differential
+#             # first symbol of differential term
+#             # Dict_differentials for masking differential terms
+#             # and resubstituting differentials in equations after putting in interpolations
+#             Dict_differentials[eval(in_exp)] = Symbol("diff_$(length(Dict_differentials)+1)")
+#             return
+
+#         else
+#             recur_expression(in_exp, Dict_differentials)
+#         end
+#     end
+# end
+
+# # get datafree loss functions for new loss type
+# # need to call merge_strategy_with_loss_function() variant after this
+# function merge_dataset_with_loss_function(pinnrep::NeuralPDE.PINNRepresentation,
+#         dataset,
+#         datafree_pde_loss_function,
+#         datafree_bc_loss_function)
+#     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
+
+#     eltypeθ = eltype(pinnrep.flat_init_params)
+
+#     train_sets = [[dataset[i][:, 2] for i in eachindex(dataset)], [[0;;], [0;;], [0;;]]]
+
+#     # the points in the domain and on the boundary
+#     pde_train_sets, bcs_train_sets = train_sets
+#     # pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+#     #     pde_train_sets)
+#     # bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+#     #     bcs_train_sets)
+#     pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
+#                           for (_loss, _set) in zip(datafree_pde_loss_function,
+#         pde_train_sets)]
+
+#     bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
+#                          for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
+
+#     pde_loss_functions, bc_loss_functions
+# end
+
+# function get_loss_function(loss_function, train_set, eltypeθ; τ = nothing)
+#     loss = (θ) -> mean(abs2, loss_function(train_set, θ))
+# end
+
+# # for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
+# # and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
+# # eqs is vector of pde eqs and dataset here is dataset_pde
+# # normally you get vector of losses
+# function get_loss_2(pinnrep, dataset, eqs)
+#     depvars = pinnrep.depvars # order is same as dataset and interps
+#     dict_depvar_input = pinnrep.dict_depvar_input
+
+#     to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars)
+#     interp_subs_dict = Dict(tobe_subs[depvar] => to_subs[depvar] for depvar in depvars)
+
+#     Dict_differentials = Dict()
+#     exp = toexpr(eqs)
+#     void_value = [recur_expression(exp_i, Dict_differentials) for exp_i in exp]
+#     # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
+
+#     # masking operation
+#     a = substitute.(eqs, Ref(Dict_differentials))
+#     b = substitute.(a, Ref(interp_subs_dict))
+#     # reverse dict for re-substituing values of Differential(t)(u(t)) etc
+#     rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
+#     eqs = substitute.(b, Ref(rev_Dict_differentials))
+#     # get losses
+#     loss_functions = [NeuralPDE.build_loss_function(pinnrep,
+#         eqs[i],
+#         pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
+# end
+
+# eqs = pde_system.eqs
+# yuh1 = get_loss_2(pinnrep, dataset, eqs)
+# eqs = pinnrep.bcs
+# yuh2 = get_loss_2(pinnrep, dataset, eqs)
+
+# pde_loss_functions, bc_loss_functions = merge_dataset_with_loss_function(pinnrep,
+#     dataset,
+#     yuh1,
+#     yuh2)
+
+# pde_loss_functions()
+# # logic for recursion formula to parse differentials
+# # # this below has the whole differential term
+# # toexpr(pde_system.eqs[1]).args[2].args[3].args[3] isa ModelingToolkit.Differential
+# # toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
+# # # .args[1] isa ModelingToolkit.Differential
+
+# # logic for interpolation and indvars splatting to get Equation parsing terms
+# # splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+# # # splat datasets onto Linear interpolations tables
+# # interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+# # interps = Dict(depvars .=> interps)
+# # get datasets into splattable form
+# # splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+# # # splat datasets onto Linear interpolations tables
+# # yu = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+# # Symbol(:($(yu[1])))
+
+# # logic to contrauct dict to feed for masking
+# # Dict(interps[depvar] => dict_depvar_input[depvar] for depvar in depvars)
\ No newline at end of file

From f1e031559f5426ce7a75453369d44ac13491a3d3 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 01:43:13 +0530
Subject: [PATCH 010/107] removed duplicate methods

---
 src/PDE_BPINN.jl        | 21 +++++++++++++++++++++
 src/advancedHMC_MCMC.jl | 21 ---------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index d5792d5e35..2e0401d843 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -376,6 +376,27 @@ function inference(samples, pinnrep, saveats, numensemble, ℓπ)
     end
 end
 
+function integratorchoice(Integratorkwargs, initial_ϵ)
+    Integrator = Integratorkwargs[:Integrator]
+    if Integrator == JitteredLeapfrog
+        jitter_rate = Integratorkwargs[:jitter_rate]
+        Integrator(initial_ϵ, jitter_rate)
+    elseif Integrator == TemperedLeapfrog
+        tempering_rate = Integratorkwargs[:tempering_rate]
+        Integrator(initial_ϵ, tempering_rate)
+    else
+        Integrator(initial_ϵ)
+    end
+end
+
+function adaptorchoice(Adaptor, mma, ssa)
+    if Adaptor != AdvancedHMC.NoAdaptation()
+        Adaptor(mma, ssa)
+    else
+        AdvancedHMC.NoAdaptation()
+    end
+end
+
 """
 ```julia
 ahmc_bayesian_pinn_pde(pde_system, discretization;
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 1a2c47de0d..1efbadd4bd 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -351,27 +351,6 @@ function kernelchoice(Kernel, MCMCkwargs)
     end
 end
 
-function integratorchoice(Integratorkwargs, initial_ϵ)
-    Integrator = Integratorkwargs[:Integrator]
-    if Integrator == JitteredLeapfrog
-        jitter_rate = Integratorkwargs[:jitter_rate]
-        Integrator(initial_ϵ, jitter_rate)
-    elseif Integrator == TemperedLeapfrog
-        tempering_rate = Integratorkwargs[:tempering_rate]
-        Integrator(initial_ϵ, tempering_rate)
-    else
-        Integrator(initial_ϵ)
-    end
-end
-
-function adaptorchoice(Adaptor, mma, ssa)
-    if Adaptor != AdvancedHMC.NoAdaptation()
-        Adaptor(mma, ssa)
-    else
-        AdvancedHMC.NoAdaptation()
-    end
-end
-
 """
 ```julia
 ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,

From 4c88dd4ec18778f25caba8c6053c6f62d4f9223c Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 02:02:02 +0530
Subject: [PATCH 011/107] update BPINN_PDE_tests.jl

---
 test/BPINN_PDE_tests.jl | 151 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 6dd3637f5a..7a5e47b83d 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -206,3 +206,154 @@ end
     u_predict = pmean(sol1.ensemblesol[1])
     @test u_predict≈u_real atol=0.8
 end
+
+
+
+# # NEW LOSS FUNCTION CODE
+pinnrep = symbolic_discretize(pde_system, discretization)
+
+# general equation with diff
+# now 1> substute u(t), phi(t) values from dataset and get multiple equations
+# phi[i] must be in numeric_derivative() form
+# derivative(phi, u, [x, y], εs, order, θ) - use parse_equations() and interp object to create loss function
+# this function must take interp objects(train sets)
+# dataset - get u(t), t from dataset interpolations object
+# make lhs-rhs loss
+# sum losses
+
+using DataInterpolations
+
+# dataset_pde has normal matrix format 
+# dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
+function get_symbols(dict_depvar_input, dataset, depvars)
+    # get datasets into splattable form
+    splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+    # splat datasets onto Linear interpolations tables
+    interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+    interps = Dict(depvars .=> interps)
+
+    Dict_symbol_interps = Dict(depvar => (interps[depvar], dict_depvar_input[depvar])
+                               for depvar in depvars)
+
+    tobe_subs = Dict()
+    for (a, b) in dict_depvar_input
+        tobe_subs[a] = eval(:($a($(b...))))
+    end
+
+    to_subs = Dict()
+    for (a, b) in Dict_symbol_interps
+        b1, b2 = b
+        to_subs[a] = eval(:($b1($(b2...))))
+    end
+    return to_subs, tobe_subs
+end
+
+function recur_expression(exp, Dict_differentials)
+    for in_exp in exp.args
+        if !(in_exp isa Expr)
+            # skip +,== symbols, characters etc
+            continue
+
+        elseif in_exp.args[1] isa ModelingToolkit.Differential
+            # first symbol of differential term
+            # Dict_differentials for masking differential terms
+            # and resubstituting differentials in equations after putting in interpolations
+            Dict_differentials[eval(in_exp)] = Symbol("diff_$(length(Dict_differentials)+1)")
+            return
+
+        else
+            recur_expression(in_exp, Dict_differentials)
+        end
+    end
+end
+
+# get datafree loss functions for new loss type
+# need to call merge_strategy_with_loss_function() variant after this
+function merge_dataset_with_loss_function(pinnrep::NeuralPDE.PINNRepresentation,
+        dataset,
+        datafree_pde_loss_function,
+        datafree_bc_loss_function)
+    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
+
+    eltypeθ = eltype(pinnrep.flat_init_params)
+
+    train_sets = [[dataset[i][:, 2] for i in eachindex(dataset)], [[0;;], [0;;], [0;;]]]
+
+    # the points in the domain and on the boundary
+    pde_train_sets, bcs_train_sets = train_sets
+    # pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+    #     pde_train_sets)
+    # bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+    #     bcs_train_sets)
+    pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
+                          for (_loss, _set) in zip(datafree_pde_loss_function,
+        pde_train_sets)]
+
+    bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
+                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
+
+    pde_loss_functions, bc_loss_functions
+end
+
+function get_loss_function(loss_function, train_set, eltypeθ; τ = nothing)
+    loss = (θ) -> mean(abs2, loss_function(train_set, θ))
+end
+
+# for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
+# and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
+# eqs is vector of pde eqs and dataset here is dataset_pde
+# normally you get vector of losses
+function get_loss_2(pinnrep, dataset, eqs)
+    depvars = pinnrep.depvars # order is same as dataset and interps
+    dict_depvar_input = pinnrep.dict_depvar_input
+
+    to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars)
+    interp_subs_dict = Dict(tobe_subs[depvar] => to_subs[depvar] for depvar in depvars)
+
+    Dict_differentials = Dict()
+    exp = toexpr(eqs)
+    void_value = [recur_expression(exp_i, Dict_differentials) for exp_i in exp]
+    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
+
+    # masking operation
+    a = substitute.(eqs, Ref(Dict_differentials))
+    b = substitute.(a, Ref(interp_subs_dict))
+    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
+    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
+    eqs = substitute.(b, Ref(rev_Dict_differentials))
+    # get losses
+    loss_functions = [NeuralPDE.build_loss_function(pinnrep,
+        eqs[i],
+        pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
+end
+
+eqs = pde_system.eqs
+yuh1 = get_loss_2(pinnrep, dataset, eqs)
+eqs = pinnrep.bcs
+yuh2 = get_loss_2(pinnrep, dataset, eqs)
+
+pde_loss_functions, bc_loss_functions = merge_dataset_with_loss_function(pinnrep,
+    dataset,
+    yuh1,
+    yuh2)
+
+pde_loss_functions()
+# logic for recursion formula to parse differentials
+# # this below has the whole differential term
+# toexpr(pde_system.eqs[1]).args[2].args[3].args[3] isa ModelingToolkit.Differential
+# toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
+# # .args[1] isa ModelingToolkit.Differential
+
+# logic for interpolation and indvars splatting to get Equation parsing terms
+# splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+# # splat datasets onto Linear interpolations tables
+# interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+# interps = Dict(depvars .=> interps)
+# get datasets into splattable form
+# splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+# # splat datasets onto Linear interpolations tables
+# yu = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+# Symbol(:($(yu[1])))
+
+# logic to contrauct dict to feed for masking
+# Dict(interps[depvar] => dict_depvar_input[depvar] for depvar in depvars)
\ No newline at end of file

From f7836fdde4f57fbeb18ebe46dc89f4995dbbb2b0 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 02:03:46 +0530
Subject: [PATCH 012/107] update BPINN_PDE_tests.jl

---
 test/BPINN_PDE_tests.jl | 153 +---------------------------------------
 1 file changed, 1 insertion(+), 152 deletions(-)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 7a5e47b83d..cd8f6ef466 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -205,155 +205,4 @@ end
     u_real = vec([analytic_sol_func(t) for t in ts])
     u_predict = pmean(sol1.ensemblesol[1])
     @test u_predict≈u_real atol=0.8
-end
-
-
-
-# # NEW LOSS FUNCTION CODE
-pinnrep = symbolic_discretize(pde_system, discretization)
-
-# general equation with diff
-# now 1> substute u(t), phi(t) values from dataset and get multiple equations
-# phi[i] must be in numeric_derivative() form
-# derivative(phi, u, [x, y], εs, order, θ) - use parse_equations() and interp object to create loss function
-# this function must take interp objects(train sets)
-# dataset - get u(t), t from dataset interpolations object
-# make lhs-rhs loss
-# sum losses
-
-using DataInterpolations
-
-# dataset_pde has normal matrix format 
-# dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
-function get_symbols(dict_depvar_input, dataset, depvars)
-    # get datasets into splattable form
-    splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
-    # splat datasets onto Linear interpolations tables
-    interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
-    interps = Dict(depvars .=> interps)
-
-    Dict_symbol_interps = Dict(depvar => (interps[depvar], dict_depvar_input[depvar])
-                               for depvar in depvars)
-
-    tobe_subs = Dict()
-    for (a, b) in dict_depvar_input
-        tobe_subs[a] = eval(:($a($(b...))))
-    end
-
-    to_subs = Dict()
-    for (a, b) in Dict_symbol_interps
-        b1, b2 = b
-        to_subs[a] = eval(:($b1($(b2...))))
-    end
-    return to_subs, tobe_subs
-end
-
-function recur_expression(exp, Dict_differentials)
-    for in_exp in exp.args
-        if !(in_exp isa Expr)
-            # skip +,== symbols, characters etc
-            continue
-
-        elseif in_exp.args[1] isa ModelingToolkit.Differential
-            # first symbol of differential term
-            # Dict_differentials for masking differential terms
-            # and resubstituting differentials in equations after putting in interpolations
-            Dict_differentials[eval(in_exp)] = Symbol("diff_$(length(Dict_differentials)+1)")
-            return
-
-        else
-            recur_expression(in_exp, Dict_differentials)
-        end
-    end
-end
-
-# get datafree loss functions for new loss type
-# need to call merge_strategy_with_loss_function() variant after this
-function merge_dataset_with_loss_function(pinnrep::NeuralPDE.PINNRepresentation,
-        dataset,
-        datafree_pde_loss_function,
-        datafree_bc_loss_function)
-    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
-
-    eltypeθ = eltype(pinnrep.flat_init_params)
-
-    train_sets = [[dataset[i][:, 2] for i in eachindex(dataset)], [[0;;], [0;;], [0;;]]]
-
-    # the points in the domain and on the boundary
-    pde_train_sets, bcs_train_sets = train_sets
-    # pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-    #     pde_train_sets)
-    # bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-    #     bcs_train_sets)
-    pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
-                          for (_loss, _set) in zip(datafree_pde_loss_function,
-        pde_train_sets)]
-
-    bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
-                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
-
-    pde_loss_functions, bc_loss_functions
-end
-
-function get_loss_function(loss_function, train_set, eltypeθ; τ = nothing)
-    loss = (θ) -> mean(abs2, loss_function(train_set, θ))
-end
-
-# for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
-# and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
-# eqs is vector of pde eqs and dataset here is dataset_pde
-# normally you get vector of losses
-function get_loss_2(pinnrep, dataset, eqs)
-    depvars = pinnrep.depvars # order is same as dataset and interps
-    dict_depvar_input = pinnrep.dict_depvar_input
-
-    to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars)
-    interp_subs_dict = Dict(tobe_subs[depvar] => to_subs[depvar] for depvar in depvars)
-
-    Dict_differentials = Dict()
-    exp = toexpr(eqs)
-    void_value = [recur_expression(exp_i, Dict_differentials) for exp_i in exp]
-    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
-
-    # masking operation
-    a = substitute.(eqs, Ref(Dict_differentials))
-    b = substitute.(a, Ref(interp_subs_dict))
-    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
-    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
-    eqs = substitute.(b, Ref(rev_Dict_differentials))
-    # get losses
-    loss_functions = [NeuralPDE.build_loss_function(pinnrep,
-        eqs[i],
-        pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
-end
-
-eqs = pde_system.eqs
-yuh1 = get_loss_2(pinnrep, dataset, eqs)
-eqs = pinnrep.bcs
-yuh2 = get_loss_2(pinnrep, dataset, eqs)
-
-pde_loss_functions, bc_loss_functions = merge_dataset_with_loss_function(pinnrep,
-    dataset,
-    yuh1,
-    yuh2)
-
-pde_loss_functions()
-# logic for recursion formula to parse differentials
-# # this below has the whole differential term
-# toexpr(pde_system.eqs[1]).args[2].args[3].args[3] isa ModelingToolkit.Differential
-# toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
-# # .args[1] isa ModelingToolkit.Differential
-
-# logic for interpolation and indvars splatting to get Equation parsing terms
-# splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
-# # splat datasets onto Linear interpolations tables
-# interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
-# interps = Dict(depvars .=> interps)
-# get datasets into splattable form
-# splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
-# # splat datasets onto Linear interpolations tables
-# yu = [LinearInterpolation(splat_i...) for splat_i in splat_form]
-# Symbol(:($(yu[1])))
-
-# logic to contrauct dict to feed for masking
-# Dict(interps[depvar] => dict_depvar_input[depvar] for depvar in depvars)
\ No newline at end of file
+end
\ No newline at end of file

From a3a0cb547dba13ec4c4f5f5f2a2a86246809bfde Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 02:24:06 +0530
Subject: [PATCH 013/107] keeping bayesian directory files in sync with master

---
 src/{bayesian => }/BPINN_ode.jl        |  0
 src/NeuralPDE.jl                       |  6 +++---
 src/{bayesian => }/advancedHMC_MCMC.jl | 22 ----------------------
 src/{bayesian => }/collocated_estim.jl |  0
 4 files changed, 3 insertions(+), 25 deletions(-)
 rename src/{bayesian => }/BPINN_ode.jl (100%)
 rename src/{bayesian => }/advancedHMC_MCMC.jl (96%)
 rename src/{bayesian => }/collocated_estim.jl (100%)

diff --git a/src/bayesian/BPINN_ode.jl b/src/BPINN_ode.jl
similarity index 100%
rename from src/bayesian/BPINN_ode.jl
rename to src/BPINN_ode.jl
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 66d6e9c58f..57d13d255e 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -50,10 +50,10 @@ include("dae_solve.jl")
 include("transform_inf_integral.jl")
 include("discretize.jl")
 include("neural_adapter.jl")
-include("bayesian/advancedHMC_MCMC.jl")
-include("bayesian/BPINN_ode.jl")
+include("advancedHMC_MCMC.jl")
+include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
-include("bayesian/collocated_estim.jl")
+include("collocated_estim.jl")
 
 export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE, NNDAE,
     KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem,
diff --git a/src/bayesian/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
similarity index 96%
rename from src/bayesian/advancedHMC_MCMC.jl
rename to src/advancedHMC_MCMC.jl
index 5e995ebfdb..ada4539b10 100644
--- a/src/bayesian/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -290,25 +290,9 @@ function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
     return θ, chain, st
 end
 
-function generate_Tar(chain::Flux.Chain, init_params)
-    θ, re = Flux.destructure(chain)
-    return init_params, re, nothing
-end
-
-function generate_Tar(chain::Flux.Chain, init_params::Nothing)
-    θ, re = Flux.destructure(chain)
-    # find_good_stepsize,phasepoint takes only float64
-    return θ, re, nothing
-end
-
 """
 nn OUTPUT AT t,θ ~ phi(t,θ)
 """
-function (f::LogTargetDensity{C, S})(t::AbstractVector,
-    θ) where {C <: Optimisers.Restructure, S}
-    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
-end
-
 function (f::LogTargetDensity{C, S})(t::AbstractVector,
     θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
@@ -317,12 +301,6 @@ function (f::LogTargetDensity{C, S})(t::AbstractVector,
     f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
 end
 
-function (f::LogTargetDensity{C, S})(t::Number,
-    θ) where {C <: Optimisers.Restructure, S}
-    #  must handle paired odes hence u0 broadcasted
-    f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t]))
-end
-
 function (f::LogTargetDensity{C, S})(t::Number,
     θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
diff --git a/src/bayesian/collocated_estim.jl b/src/collocated_estim.jl
similarity index 100%
rename from src/bayesian/collocated_estim.jl
rename to src/collocated_estim.jl

From e0803b15e5e16b753032d0f9e7908a2da8fec0bb Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 02:45:03 +0530
Subject: [PATCH 014/107] changes to sync with master

---
 src/advancedHMC_MCMC.jl | 188 +++++++++++++++++++---------------------
 1 file changed, 87 insertions(+), 101 deletions(-)

diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index ada4539b10..e582be4f64 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -68,9 +68,11 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
 end
 
 """
-cool function to convert parameter's vector to ComponentArray of parameters (for Lux Chain: vector of samples -> Lux ComponentArrays)
+Function needed for converting vector of sampled parameters into ComponentVector in case of Lux chain output, derivatives
+the sampled parameters are of exotic type `Dual` due to ForwardDiff's autodiff tagging.
 """
-function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
+function vector_to_parameters(ps_new::AbstractVector,
+        ps::Union{NamedTuple, ComponentArrays.ComponentVector})
     @assert length(ps_new) == Lux.parameterlength(ps)
     i = 1
     function get_ps(x)
@@ -81,6 +83,8 @@ function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
     return Functors.fmap(get_ps, ps)
 end
 
+vector_to_parameters(ps_new::AbstractVector, ps::AbstractVector) = ps_new
+
 function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
     if Tar.estim_collocate
         return physloglikelihood(Tar, θ)/length(Tar.dataset[1]) + priorweights(Tar, θ) + L2LossData(Tar, θ)/length(Tar.dataset[1]) + L2loss2(Tar, θ)/length(Tar.dataset[1])
@@ -96,7 +100,7 @@ function LogDensityProblems.capabilities(::LogTargetDensity)
 end
 
 """
-L2 loss loglikelihood(needed for ODE parameter estimation)
+L2 loss loglikelihood(needed for ODE parameter estimation).
 """
 function L2LossData(Tar::LogTargetDensity, θ)
     # check if dataset is provided
@@ -110,9 +114,8 @@ function L2LossData(Tar::LogTargetDensity, θ)
         for i in 1:length(Tar.prob.u0)
             # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
             L2logprob += logpdf(MvNormal(nn[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        Tar.l2std[i] .*
-                        ones(length(Tar.dataset[i]))))),
+                    LinearAlgebra.Diagonal(abs2.(Tar.l2std[i] .*
+                                                 ones(length(Tar.dataset[i]))))),
                 Tar.dataset[i])
         end
         return L2logprob
@@ -120,7 +123,7 @@ function L2LossData(Tar::LogTargetDensity, θ)
 end
 
 """
-physics loglikelihood over problem timespan + dataset timepoints
+Physics loglikelihood over problem timespan + dataset timepoints.
 """
 function physloglikelihood(Tar::LogTargetDensity, θ)
     f = Tar.prob.f
@@ -218,7 +221,7 @@ function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
 end
 
 """
-MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ 
+MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ.
 """
 function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
     ode_params)
@@ -251,14 +254,13 @@ function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector,
 
     # N dimensional vector if N outputs for NN(each row has logpdf of i[i] where u is vector of dependant variables)
     return [logpdf(MvNormal(vals[i, :],
-            LinearAlgebra.Diagonal(map(abs2,
-                Tar.phystd[i] .*
-                ones(length(vals[i, :]))))),
+            LinearAlgebra.Diagonal(abs2.(Tar.phystd[i] .*
+                                         ones(length(vals[i, :]))))),
         zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
 end
 
 """
-prior logpdf for NN parameters + ODE constants
+Prior logpdf for NN parameters + ODE constants.
 """
 function priorweights(Tar::LogTargetDensity, θ)
     allparams = Tar.priors
@@ -291,10 +293,10 @@ function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
 end
 
 """
-nn OUTPUT AT t,θ ~ phi(t,θ)
+NN OUTPUT AT t,θ ~ phi(t,θ).
 """
 function (f::LogTargetDensity{C, S})(t::AbstractVector,
-    θ) where {C <: Lux.AbstractExplicitLayer, S}
+        θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
     y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
@@ -302,7 +304,7 @@ function (f::LogTargetDensity{C, S})(t::AbstractVector,
 end
 
 function (f::LogTargetDensity{C, S})(t::Number,
-    θ) where {C <: Lux.AbstractExplicitLayer, S}
+        θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
     y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
@@ -310,7 +312,7 @@ function (f::LogTargetDensity{C, S})(t::Number,
 end
 
 """
-similar to ode_dfdx() in NNODE/ode_solve.jl
+Similar to ode_dfdx() in NNODE.
 """
 function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
     if autodiff
@@ -334,40 +336,19 @@ function kernelchoice(Kernel, MCMCkwargs)
     end
 end
 
-function integratorchoice(Integratorkwargs, initial_ϵ)
-    Integrator = Integratorkwargs[:Integrator]
-    if Integrator == JitteredLeapfrog
-        jitter_rate = Integratorkwargs[:jitter_rate]
-        Integrator(initial_ϵ, jitter_rate)
-    elseif Integrator == TemperedLeapfrog
-        tempering_rate = Integratorkwargs[:tempering_rate]
-        Integrator(initial_ϵ, tempering_rate)
-    else
-        Integrator(initial_ϵ)
-    end
-end
-
-function adaptorchoice(Adaptor, mma, ssa)
-    if Adaptor != AdvancedHMC.NoAdaptation()
-        Adaptor(mma, ssa)
-    else
-        AdvancedHMC.NoAdaptation()
-    end
-end
-
 """
-```julia
-ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
-                    dataset = [nothing],init_params = nothing, 
-                    draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
-                    phystd = [0.05], priorsNNw = (0.0, 2.0),
-                    param = [], nchains = 1, autodiff = false, Kernel = HMC,
-                    Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-                        Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-                    Integratorkwargs = (Integrator = Leapfrog,),
-                    MCMCkwargs = (n_leapfrog = 30,),
-                    progress = false, verbose = false)
-```
+    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
+                        dataset = [nothing],init_params = nothing, 
+                        draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
+                        phystd = [0.05], priorsNNw = (0.0, 2.0),
+                        param = [], nchains = 1, autodiff = false, Kernel = HMC,
+                        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+                                         Metric = DiagEuclideanMetric, 
+                                         targetacceptancerate = 0.8),
+                        Integratorkwargs = (Integrator = Leapfrog,),
+                        MCMCkwargs = (n_leapfrog = 30,),
+                        progress = false, verbose = false)
+
 !!! warn
 
     Note that ahmc_bayesian_pinn_ode() only supports ODEs which are written in the out-of-place form, i.e.
@@ -375,85 +356,82 @@ ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
     will exit with an error.
 
 ## Example
+
+```julia
 linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
 tspan = (0.0, 10.0)
 u0 = 0.0
 p = [5.0, -5.0]
 prob = ODEProblem(linear, u0, tspan, p)
 
-# CREATE DATASET (Necessity for accurate Parameter estimation)
+### CREATE DATASET (Necessity for accurate Parameter estimation)
 sol = solve(prob, Tsit5(); saveat = 0.05)
 u = sol.u[1:100]
 time = sol.t[1:100]
 
-# dataset and BPINN create
+### dataset and BPINN create
 x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
 dataset = [x̂, time]
 
-chainflux1 = Flux.Chain(Flux.Dense(1, 5, tanh), Flux.Dense(5, 5, tanh), Flux.Dense(5, 1)
-
-# simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
-fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob,chainflux1,
-                                                                          dataset = dataset,
-                                                                          draw_samples = 1500,
-                                                                          l2std = [0.05],
-                                                                          phystd = [0.05],
-                                                                          priorsNNw = (0.0,3.0))
-
-# solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
-fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob,chainflux1,
-                                                                          dataset = dataset,
-                                                                          draw_samples = 1500,
-                                                                          l2std = [0.05],
-                                                                          phystd = [0.05],
-                                                                          priorsNNw = (0.0,3.0),
-                                                                          param = [Normal(6.5,0.5),Normal(-3,0.5)])
-
-## NOTES 
+chain1 = Lux.Chain(Lux.Dense(1, 5, tanh), Lux.Dense(5, 5, tanh), Lux.Dense(5, 1)
+
+### simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chain1,
+                                                            dataset = dataset,
+                                                            draw_samples = 1500,
+                                                            l2std = [0.05],
+                                                            phystd = [0.05],
+                                                            priorsNNw = (0.0,3.0))
+
+### solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chain1,
+                                                            dataset = dataset,
+                                                            draw_samples = 1500,
+                                                            l2std = [0.05],
+                                                            phystd = [0.05],
+                                                            priorsNNw = (0.0,3.0),
+                                                            param = [Normal(6.5,0.5), Normal(-3,0.5)])
+```
+
+## NOTES
+
 Dataset is required for accurate Parameter estimation + solving equations
 Incase you are only solving the Equations for solution, do not provide dataset
 
 ## Positional Arguments
-* `prob`: DEProblem(out of place and the function signature should be f(u,p,t)
-* `chain`: Lux/Flux Neural Netork which would be made the Bayesian PINN
+
+* `prob`: DEProblem(out of place and the function signature should be f(u,p,t).
+* `chain`: Lux Neural Netork which would be made the Bayesian PINN.
 
 ## Keyword Arguments
+
 * `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
-* `dataset`: Vector containing Vectors of corresponding u,t values 
 * `init_params`: intial parameter values for BPINN (ideally for multiple chains different initializations preferred)
-* `nchains`: number of chains you want to sample (random initialisation of params by default)
+* `nchains`: number of chains you want to sample
 * `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
-* `l2std`: standard deviation of BPINN predicition against L2 losses/Dataset
-* `phystd`: standard deviation of BPINN predicition against Chosen Underlying ODE System
-* `priorsNNw`: Vector of [mean, std] for BPINN parameter. Weights and Biases of BPINN are Normal Distributions by default
+* `l2std`: standard deviation of BPINN prediction against L2 losses/Dataset
+* `phystd`: standard deviation of BPINN prediction against Chosen Underlying ODE System
+* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of BPINN are Normal Distributions by default.
 * `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
 * `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
 * `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
-
-# AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
 * `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implemenations HMC/NUTS/HMCDA)
-* `Integratorkwargs`: A NamedTuple containing the chosen integrator and its keyword Arguments, as follows :
-    * `Integrator`: https://turinglang.org/AdvancedHMC.jl/stable/
-    * `jitter_rate`: https://turinglang.org/AdvancedHMC.jl/stable/
-    * `tempering_rate`: https://turinglang.org/AdvancedHMC.jl/stable/
-* `Adaptorkwargs`: A NamedTuple containing the chosen Adaptor, it's Metric and targetacceptancerate, as follows :
-    * `Adaptor`: https://turinglang.org/AdvancedHMC.jl/stable/
-    * `Metric`: https://turinglang.org/AdvancedHMC.jl/stable/
-    * `targetacceptancerate`: Target percentage(in decimal) of iterations in which the proposals were accepted(0.8 by default)
+* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+    Note: Target percentage(in decimal) of iterations in which the proposals are accepted (0.8 by default)
 * `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's(HMC/NUTS/HMCDA) Arguments, as follows :
     * `n_leapfrog`: number of leapfrog steps for HMC
     * `δ`: target acceptance probability for NUTS and HMCDA
     * `λ`: target trajectory length for HMCDA
     * `max_depth`: Maximum doubling tree depth (NUTS)
     * `Δ_max`: Maximum divergence during doubling tree (NUTS)
+    Refer: https://turinglang.org/AdvancedHMC.jl/stable/
 * `progress`: controls whether to show the progress meter or not.
 * `verbose`: controls the verbosity. (Sample call args in AHMC)
 
-"""
+## Warnings
 
-"""
-dataset would be (x̂,t)
-priors: pdf for W,b + pdf for ODE params
+* AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
 """
 function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     strategy = GridTraining, dataset = [nothing],
@@ -469,6 +447,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     progress = false, verbose = false,
     estim_collocate = false)
 
+    !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)
         throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
@@ -487,11 +466,11 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         throw(error("Dataset Required for Parameter Estimation."))
     end
 
-    if chain isa Lux.AbstractExplicitLayer || chain isa Flux.Chain
+    if chain isa Lux.AbstractExplicitLayer
         # Flux-vector, Lux-Named Tuple
         initial_nnθ, recon, st = generate_Tar(chain, init_params)
     else
-        error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")
+        error("Only Lux.AbstractExplicitLayer Neural networks are supported")
     end
 
     if nchains > Threads.nthreads()
@@ -501,13 +480,9 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     end
 
     # eltype(physdt) cause needs Float64 for find_good_stepsize
-    if chain isa Lux.AbstractExplicitLayer
-        # Lux chain(using component array later as vector_to_parameter need namedtuple)
-        initial_θ = collect(eltype(physdt),
-            vcat(ComponentArrays.ComponentArray(initial_nnθ)))
-    else
-        initial_θ = collect(eltype(physdt), initial_nnθ)
-    end
+    # Lux chain(using component array later as vector_to_parameter need namedtuple)
+    initial_θ = collect(eltype(physdt),
+        vcat(ComponentArrays.ComponentArray(initial_nnθ)))
 
     # adding ode parameter estimation
     nparameters = length(initial_θ)
@@ -540,6 +515,10 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         end
     end
 
+    @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, initial_θ))
+    @info("Current Prior Log-likelihood : ", priorweights(ℓπ, initial_θ))
+    @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
+
     Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
     Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
 
@@ -585,6 +564,13 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
         samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
             adaptor; progress = progress, verbose = verbose, drop_warmup = true)
+
+        @info("Sampling Complete.")
+        @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, samples[end]))
+        @info("Current Prior Log-likelihood : ", priorweights(ℓπ, samples[end]))
+        @info("Current MSE against dataset Log-likelihood : ",
+            L2LossData(ℓπ, samples[end]))
+            
         # return a chain(basic chain),samples and stats
         matrix_samples = reshape(hcat(samples...), (length(samples[1]), length(samples), 1)) 
         mcmc_chain = MCMCChains.Chains(matrix_samples)

From 3498ddc8264f5f71841ff86f118f98777b5cb284 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 02:47:52 +0530
Subject: [PATCH 015/107] keep new dir

---
 src/NeuralPDE.jl                       | 6 +++---
 src/{ => bayesian}/BPINN_ode.jl        | 0
 src/{ => bayesian}/advancedHMC_MCMC.jl | 0
 src/{ => bayesian}/collocated_estim.jl | 0
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename src/{ => bayesian}/BPINN_ode.jl (100%)
 rename src/{ => bayesian}/advancedHMC_MCMC.jl (100%)
 rename src/{ => bayesian}/collocated_estim.jl (100%)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 57d13d255e..66d6e9c58f 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -50,10 +50,10 @@ include("dae_solve.jl")
 include("transform_inf_integral.jl")
 include("discretize.jl")
 include("neural_adapter.jl")
-include("advancedHMC_MCMC.jl")
-include("BPINN_ode.jl")
+include("bayesian/advancedHMC_MCMC.jl")
+include("bayesian/BPINN_ode.jl")
 include("PDE_BPINN.jl")
-include("collocated_estim.jl")
+include("bayesian/collocated_estim.jl")
 
 export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE, NNDAE,
     KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem,
diff --git a/src/BPINN_ode.jl b/src/bayesian/BPINN_ode.jl
similarity index 100%
rename from src/BPINN_ode.jl
rename to src/bayesian/BPINN_ode.jl
diff --git a/src/advancedHMC_MCMC.jl b/src/bayesian/advancedHMC_MCMC.jl
similarity index 100%
rename from src/advancedHMC_MCMC.jl
rename to src/bayesian/advancedHMC_MCMC.jl
diff --git a/src/collocated_estim.jl b/src/bayesian/collocated_estim.jl
similarity index 100%
rename from src/collocated_estim.jl
rename to src/bayesian/collocated_estim.jl

From 8ba64b2ea633b9fb694e68cc08cb7d056417df62 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Mon, 12 Feb 2024 19:40:25 +0530
Subject: [PATCH 016/107] having problems with eval() call in recursive Dict
 creation

---
 src/PDE_BPINN.jl              | 203 +++++++-------
 src/training_strategies.jl    |  36 ++-
 test/BPINN_PDEinvsol_tests.jl | 502 +++++++++++++++++++++++++++++++++-
 3 files changed, 631 insertions(+), 110 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index b01df40a9f..96950961ff 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -63,28 +63,86 @@ mutable struct PDELogTargetDensity{
     end
 end
 
+# for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
+# and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
+# eqs is vector of pde eqs and dataset here is dataset_pde
+# normally you get vector of losses
+function get_lossy(pinnrep, dataset, eqs)
+    depvars = pinnrep.depvars # order is same as dataset and interps
+    dict_depvar_input = pinnrep.dict_depvar_input
+
+    Dict_differentials0 = Dict()
+    exp = toexpr(eqs)
+    Symbolics.variable.(hcat(pinnrep.indvars, pinnrep.depvars))
+    for exp_i in exp
+        recur_expression(exp_i, Dict_differentials0)
+    end
+    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
+
+    Dict_differentials = Dict()
+    for (a, b) in Dict_differentials0
+        # println(eval(a.args[1]))
+        # Symbolics.operation(Symbolics.value(z))
+        println(a)
+        a = Symbolics.parse_expr_to_symbolic(a, NeuralPDE)
+        Dict_differentials[a] = b
+    end
+
+    # masking operation
+    println("Dict_differentials : ", Dict_differentials)
+    a = substitute.(eqs, Ref(Dict_differentials))
+    println("Masked Differential term : ", a)
+
+    to_subs, tobe_subs = get_symbols(dataset, depvars, eqs)
+    # for each row in dataset create u values for substituing in equation, n_equations=n_rows
+    eq_subs = [Dict(tobe_subs[depvar] => to_subs[depvar][i] for depvar in depvars)
+               for i in 1:size(dataset[1][:, 1])[1]]
+
+    b = []
+    for eq_sub in eq_subs
+        push!(b, [substitute(a_i, eq_sub) for a_i in a])
+    end
+
+    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
+    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
+
+    c = []
+    for b_i in b
+        push!(c, substitute.(b_i, Ref(rev_Dict_differentials)))
+    end
+    println("After re Substituting depvars : ", c[1])
+    # c = hcat(c...)
+
+    # get losses
+    loss_functions = [[build_loss_function(pinnrep, eq, pde_indvar)
+                       for (eq, pde_indvar, integration_indvar) in zip(c[i],
+        pinnrep.pde_indvars,
+        pinnrep.pde_integration_vars)] for i in eachindex(c)]
+
+    return loss_functions
+end
+
 # dataset_pde has normal matrix format 
 # dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
-function get_symbols(dict_depvar_input, dataset, depvars)
-    # get datasets into splattable form
-    splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
-    # splat datasets onto Linear interpolations tables
-    interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
-    interps = Dict(depvars .=> interps)
+function get_symbols(dataset, depvars, eqs)
+    depvar_vals = [dataset_i[:, 1] for dataset_i in dataset]
+    # order of depvars
+    to_subs = Dict(depvars .=> depvar_vals)
 
-    Dict_symbol_interps = Dict(depvar => (interps[depvar], dict_depvar_input[depvar])
-                               for depvar in depvars)
+    asrt = Symbolics.get_variables.(eqs)
+    # want only symbols of depvars
+    temp = unique(reduce(vcat, asrt))
 
     tobe_subs = Dict()
-    for (a, b) in dict_depvar_input
-        tobe_subs[a] = eval(:($a($(b...))))
+    for a in depvars
+        for i in temp
+            expr = toexpr(i)
+            if (expr isa Expr) && (expr.args[1] == a)
+                tobe_subs[a] = i
+            end
+        end
     end
 
-    to_subs = Dict()
-    for (a, b) in Dict_symbol_interps
-        b1, b2 = b
-        to_subs[a] = eval(:($b1($(b2...))))
-    end
     return to_subs, tobe_subs
 end
 
@@ -98,7 +156,9 @@ function recur_expression(exp, Dict_differentials)
             # first symbol of differential term
             # Dict_differentials for masking differential terms
             # and resubstituting differentials in equations after putting in interpolations
-            Dict_differentials[eval(in_exp)] = Symbol("diff_$(length(Dict_differentials)+1)")
+            println("starting")
+            Dict_differentials[in_exp] = Symbolics.variable("diff_$(length(Dict_differentials)+1)")
+            println("ending")
             return
 
         else
@@ -107,71 +167,11 @@ function recur_expression(exp, Dict_differentials)
     end
 end
 
-# get datafree loss functions for new loss type
-# need to call merge_strategy_with_loss_function() variant after this
-function merge_dataset_with_loss_function(pinnrep::NeuralPDE.PINNRepresentation,
-        dataset,
-        datafree_pde_loss_function,
-        datafree_bc_loss_function)
-    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
-
-    eltypeθ = eltype(pinnrep.flat_init_params)
-
-    train_sets = [[dataset[i][:, 2] for i in eachindex(dataset)], [[0;;], [0;;], [0;;]]]
-
-    # the points in the domain and on the boundary
-    pde_train_sets, bcs_train_sets = train_sets
-    pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-        pde_train_sets)
-    bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-        bcs_train_sets)
-    pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
-                          for (_loss, _set) in zip(datafree_pde_loss_function,
-        pde_train_sets)]
-
-    bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
-                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
-
-    pde_loss_functions, bc_loss_functions
-end
-
-function get_loss_function(loss_function, train_set, eltypeθ; τ = nothing)
-    loss = (θ) -> mean(abs2, loss_function(train_set, θ))
-end
-
-# for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
-# and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
-# eqs is vector of pde eqs and dataset here is dataset_pde
-# normally you get vector of losses
-function get_loss_2(pinnrep, dataset, eqs)
-    depvars = pinnrep.depvars # order is same as dataset and interps
-    dict_depvar_input = pinnrep.dict_depvar_input
-
-    to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars)
-    interp_subs_dict = Dict(tobe_subs[depvar] => to_subs[depvar] for depvar in depvars)
-
-    Dict_differentials = Dict()
-    exp = toexpr(eqs)
-    void_value = [recur_expression(exp_i, Dict_differentials) for exp_i in exp]
-    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
-
-    # masking operation
-    a = substitute.(eqs, Ref(Dict_differentials))
-    b = substitute.(a, Ref(interp_subs_dict))
-    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
-    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
-    eqs = substitute.(b, Ref(rev_Dict_differentials))
-    # get losses
-    loss_functions = [NeuralPDE.build_loss_function(pinnrep,
-        eqs[i],
-        pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
-end
-
 function LogDensityProblems.logdensity(Tar::PDELogTargetDensity, θ)
     # for parameter estimation neccesarry to use multioutput case
     return Tar.full_loglikelihood(setparameters(Tar, θ),
-               Tar.allstd) + priorlogpdf(Tar, θ) + Tar.L2_loss2(setparameters(Tar, θ),
-               Tar.allstd)
+               Tar.allstd) + priorlogpdf(Tar, θ) + L2LossData(Tar, θ) +
+           Tar.L2_loss2(setparameters(Tar, θ), Tar.allstd)
 end
 
 function setparameters(Tar::PDELogTargetDensity, θ)
@@ -218,7 +218,7 @@ function L2LossData(Tar::PDELogTargetDensity, θ)
     # dataset of form Vector[matrix_x, matrix_y, matrix_z]
     # matrix_i is of form [i,indvar1,indvar2,..] (needed in case if heterogenous domains)
     # note that indvar1,indvar2.. cols can be different values for different depvar matrices
-    # order follows pinnrep.depvars orders of variables (order of declaration in @variables macro)
+    # dataset,phi order follows pinnrep.depvars orders of variables (order of declaration in @variables macro)
 
     # Phi is the trial solution for each NN in chain array
     # Creating logpdf( MvNormal(Phi(t,θ),std), dataset[i] )
@@ -255,7 +255,7 @@ function priorlogpdf(Tar::PDELogTargetDensity, θ)
 
         return (invlogpdf
                 +
-                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)])) 
+                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
     end
     return logpdf(nnwparams, θ)
 end
@@ -403,28 +403,30 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     dataset_pde, dataset_bc = discretization.dataset
 
     eqs = pinnrep.eqs
-    yuh1 = get_loss_2(pinnrep, dataset_pde, eqs)
-    eqs = pinnrep.bcs
-    yuh2 = get_loss_2(pinnrep, dataset_bc, eqs)
+    yuh1 = get_lossy(pinnrep, dataset_pde, eqs)
+    # eqs = pinnrep.bcs
+    # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
 
-    pde_loss_functions, bc_loss_functions = merge_dataset_with_loss_function(pinnrep,
-        dataset,
-        yuh1,
-        yuh2)
+    pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
+        GridTraining(0.1),
+        yuh1[i],
+        nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset_pde],
+        train_sets_bc = nothing)[1]
+                          for i in eachindex(yuh1)]
 
     function L2_loss2(θ, allstd)
         stdpdes, stdbcs, stdextra = allstd
-        pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
-                              for (i, pde_loss_function) in enumerate(pde_loss_functions)]
-
-        bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
-                             for (j, bc_loss_function) in enumerate(bc_loss_functions)]
-        println("pde_loglikelihoods : ", pde_loglikelihoods)
-        println("bc_loglikelihoods : ", bc_loglikelihoods)
-        return sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+        pde_loglikelihoods = [[logpdf(Normal(0, 0.8 * stdpdes[i]), pde_loss_function(θ))
+                               for (i, pde_loss_function) in enumerate(pde_loss_functions[i])]
+                              for i in eachindex(pde_loss_functions)]
+
+        # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+        #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
+        # println("bc_loglikelihoods : ", bc_loglikelihoods)
+        return sum(sum(pde_loglikelihoods))
+        # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
     end
 
-    println(L2_loss2)
     # WIP split dataset to respective equations
     if ((dataset_bc isa Nothing) && (dataset_pde isa Nothing))
         dataset = nothing
@@ -501,7 +503,8 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         names,
         ninv,
         initial_nnθ,
-        full_weighted_loglikelihood, L2_loss2,
+        full_weighted_loglikelihood,
+        L2_loss2,
         Φ)
 
     Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
@@ -516,6 +519,9 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
             ℓπ.allstd))
     @info("Current Prior Log-likelihood : ", priorlogpdf(ℓπ, initial_θ))
     @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
+    @info("Current L2_LOSSY : ",
+        ℓπ.L2_loss2(setparameters(ℓπ, initial_θ),
+            ℓπ.allstd))
 
     # parallel sampling option
     if nchains != 1
@@ -574,6 +580,9 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         @info("Current Prior Log-likelihood : ", priorlogpdf(ℓπ, samples[end]))
         @info("Current MSE against dataset Log-likelihood : ",
             L2LossData(ℓπ, samples[end]))
+        @info("Current L2_LOSSY : ",
+            ℓπ.L2_loss2(setparameters(ℓπ, samples[end]),
+                ℓπ.allstd))
 
         fullsolution = BPINNstats(mcmc_chain, samples, stats)
         ensemblecurves, estimnnparams, estimated_params, timepoints = inference(samples,
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index a419afcdbf..d3e134e8a2 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -14,6 +14,27 @@ struct GridTraining{T} <: AbstractTrainingStrategy
     dx::T
 end
 
+function get_dataset_train_points(eqs, train_sets, pinnrep)
+    dict_depvar_input = pinnrep.dict_depvar_input
+    depvars = pinnrep.depvars
+    dict_depvars = pinnrep.dict_depvars
+    dict_indvars = pinnrep.dict_indvars
+
+    symbols_input = [(i, dict_depvar_input[i]) for i in depvars]
+    eq_args = NeuralPDE.get_argument(eqs, dict_indvars, dict_depvars)
+    points = []
+    for eq_arg in eq_args
+        a = []
+        for i in eachindex(symbols_input)
+            if symbols_input[i][2] == eq_arg
+                push!(a, train_sets[i][:, 2:end]')
+            end
+        end
+        push!(points, vcat(a...))
+    end
+    return points
+end
+
 # include dataset points in pde_residual loglikelihood (BayesianPINN)
 function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
         strategy::GridTraining,
@@ -25,7 +46,10 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
 
     # is vec as later each _set in pde_train_sets are coloumns as points transformed to vector of points (pde_train_sets must be rowwise)
     pde_loss_functions = if !(train_sets_pde isa Nothing)
-        pde_train_sets = [train_set[:, 2:end] for train_set in train_sets_pde]
+
+        pde_train_sets = get_dataset_train_points(eqs, train_sets_pde, pinnrep)
+        println(" pde train set : ", pde_train_sets)
+        println("type pde  train set : ", size(pde_train_sets))
         pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
             pde_train_sets)
         [get_loss_function(_loss, _set, eltypeθ, strategy)
@@ -36,7 +60,10 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
     end
     
     bc_loss_functions = if !(train_sets_bc isa Nothing)
-        bcs_train_sets = [train_set[:, 2:end] for train_set in train_sets_bc]
+
+        bcs_train_sets = get_dataset_train_points(bcs, train_sets_bc, pinnrep)
+        println("bcs train set : ", bcs_train_sets)
+        println("type bcs train set : ", size(bcs_train_sets))
         bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
         bcs_train_sets)
         [get_loss_function(_loss, _set, eltypeθ, strategy)
@@ -71,7 +98,10 @@ function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
 
     bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
                          for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
-
+                            println("pde_train_sets : ",pde_train_sets)
+                            println("pde_train_sets : ",size(pde_train_sets))
+                            println("bc_train_sets : ",bcs_train_sets)
+                            println("bc_train_sets : ",size(bcs_train_sets))
     pde_loss_functions, bc_loss_functions
 end
 
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index b340865bed..870a6aac53 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -35,8 +35,8 @@ Random.seed!(100)
     dataset = [hcat(u1, timepoints)]
 
     # checking all training strategies
-    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true, 
-                                dataset = [dataset, nothing])
+    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
+        dataset = [dataset, nothing])
 
     ahmc_bayesian_pinn_pde(pde_system,
         discretization;
@@ -47,8 +47,8 @@ Random.seed!(100)
         saveats = [1 / 50.0],
         param = [LogNormal(6.0, 0.5)])
 
-    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true, 
-                                dataset = [dataset, nothing])
+    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
+        dataset = [dataset, nothing])
 
     ahmc_bayesian_pinn_pde(pde_system,
         discretization;
@@ -59,8 +59,8 @@ Random.seed!(100)
         saveats = [1 / 50.0],
         param = [LogNormal(6.0, 0.5)])
 
-    discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true, 
-                                dataset = [dataset, nothing])
+    discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
+        dataset = [dataset, nothing])
 
     ahmc_bayesian_pinn_pde(pde_system,
         discretization;
@@ -71,8 +71,8 @@ Random.seed!(100)
         saveats = [1 / 50.0],
         param = [LogNormal(6.0, 0.5)])
 
-    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true, 
-                                dataset = [dataset, nothing])
+    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+        dataset = [dataset, nothing])
 
     sol1 = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
@@ -132,8 +132,8 @@ end
     ts_ = hcat(sol(ts).t...)[1, :]
     dataset = [hcat(us[i, :], ts_) for i in 1:3]
 
-    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true, 
-                                dataset = [dataset, nothing])
+    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+        dataset = [dataset, nothing])
 
     @named pde_system = PDESystem(eqs, bcs, domains,
         [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
@@ -153,3 +153,485 @@ end
     @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
 end
+
+@parameters t, x, p
+@variables u(..)
+Dt = Differential(t)
+Dx = Differential(x)
+eqs = [u(t, x) * Dt(u(t, x)) - cos(p * t) ~ 0, u(t, x) + Dx(u(t, x)) ~ 0.0]
+bcs = [u(0, x) ~ 0.0, u(t, 10) ~ 1.0]
+domains = [t ∈ Interval(0.0, 2.0), x ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(2, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t, x],
+    [u(t, x)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, u1, timepoints)]
+
+# checking all training strategies
+# discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+#     dataset = [dataset, nothing])
+
+discretization = BayesianPINN([chainl],
+    GridTraining([0.2, 0.2]),
+    param_estim = true, dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.01, 0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0, 1 / 20.0],
+    param = [Normal(3.0, 0.5)], progress = true)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+# function get_symbols(dict_depvar_input, dataset, depvars, eqs)
+
+#     # get datasets into splattable form
+#     splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+#     # splat datasets onto Linear interpolations tables
+#     interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+#     # this works as order of dataset matches order of depvars
+#     interps = Dict(depvars .=> interps)
+
+#     Dict_symbol_interps = Dict(depvar => (interps[depvar], dict_depvar_input[depvar])
+#                                for depvar in depvars)
+
+#     tobe_subs = Dict()
+
+#     asrt = Symbolics.get_variables.(eqs)
+#     # want only symbols of depvars
+#     tempo = unique(reduce(vcat, asrt))[(end - length(depvars) + 1):end]
+#     # now we have all the depvars, we now need all depvars whcih can be substituted with data interps
+
+#     tobe_subs = Dict()
+#     for a in depvars
+#         for i in tempo
+#             if toexpr(i).args[1] == a
+#                 tobe_subs[a] = i
+#             end
+#         end
+#     end
+
+#     # do the same thing as above here using pinnrep.indvars
+#     to_subs = Dict()
+#     for (a, b) in Dict_symbol_interps
+#         b1, b2 = b
+#         for i in tempo
+#             if toexpr(i).args[1] == a
+#                 tobe_subs[a] = i
+#             end
+#         end
+#     end
+#     for (a, b) in Dict_symbol_interps
+#         b1, b2 = b
+#         to_subs[a] = eval(:($b1($(b2...))))
+#         # Symbol("$b1($(b2...))")
+#         # eval(:($b1($(b2...))))
+#     end
+
+#     println("to_subs : ", to_subs)
+#     println("tobe_subs : ", tobe_subs)
+#     return to_subs, tobe_subs
+# end
+
+# function recur_expression(exp, Dict_differentials)
+#     for in_exp in exp.args
+#         if !(in_exp isa Expr)
+#             # skip +,== symbols, characters etc
+#             continue
+
+#         elseif in_exp.args[1] isa ModelingToolkit.Differential
+#             # first symbol of differential term
+#             # Dict_differentials for masking differential terms
+#             # and resubstituting differentials in equations after putting in interpolations
+#             temp = eval(in_exp)
+#             # println(" inside recursion : ")
+#             # println("in_exp went from ", in_exp, " to ", temp)
+#             # println("typeof in_exp went from ", typeof(in_exp), " to ", typeof(temp))
+#             Dict_differentials[temp] = Symbol("diff_$(length(Dict_differentials)+1)")
+#             return
+
+#         else
+#             recur_expression(in_exp, Dict_differentials)
+#         end
+#     end
+# end
+
+# get datafree loss functions for new loss type
+# need to call merge_strategy_with_loss_function() variant after this
+function merge_dataset_with_loss_function(pinnrep::NeuralPDE.PINNRepresentation,
+        dataset,
+        datafree_pde_loss_function,
+        datafree_bc_loss_function)
+    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
+
+    eltypeθ = eltype(pinnrep.flat_init_params)
+
+    train_sets = [[dataset[i][:, 2] for i in eachindex(dataset)], [[0;;], [0;;], [0;;]]]
+
+    # the points in the domain and on the boundary
+    pde_train_sets, bcs_train_sets = train_sets
+    # pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+    #     pde_train_sets)
+    # bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+    #     bcs_train_sets)
+    pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
+                          for (_loss, _set) in zip(datafree_pde_loss_function,
+        pde_train_sets)]
+
+    bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
+                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
+
+    pde_loss_functions, bc_loss_functions
+end
+
+function get_loss_function(loss_function, train_set, eltypeθ; τ = nothing)
+    loss = (θ) -> mean(abs2, loss_function(train_set, θ))
+end
+
+# for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
+# and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
+# eqs is vector of pde eqs and dataset here is dataset_pde
+# normally you get vector of losses
+function get_loss_2(pinnrep, dataset, eqs)
+    depvars = pinnrep.depvars # order is same as dataset and interps
+    dict_depvar_input = pinnrep.dict_depvar_input
+
+    to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars)
+    interp_subs_dict = Dict(tobe_subs[depvar] => to_subs[depvar] for depvar in depvars)
+
+    Dict_differentials = Dict()
+    exp = toexpr(eqs)
+    void_value = [recur_expression(exp_i, Dict_differentials) for exp_i in exp]
+    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
+
+    # masking operation
+    a = substitute.(eqs, Ref(Dict_differentials))
+    println(a)
+    b = substitute.(a, Ref(interp_subs_dict))
+    println(b)
+    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
+    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
+    eqs = substitute.(b, Ref(rev_Dict_differentials))
+    # get losses
+    loss_functions = [NeuralPDE.build_loss_function(pinnrep,
+        eqs[i],
+        pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
+end
+
+# -----------------===============
+eqs
+a = substitute.(eqs, Ref(Dict(t => 1)))
+
+# after masking
+# this can remove interpolations need
+b = substitute.(eqs, Ref(Dict(u(t) => interp([1]...))))
+
+toexpr(a[1]).args[2].args[2].args[2](3)
+Symbol("$(u)")
+
+interp = LinearInterpolation([1, 2], [1, 23])
+typeof(interp)
+LinearInterpolation{Vector{Int64}, Vector{Int64}, true, Int64}
+
+typeof(interp(t))
+SymbolicUtils.BasicSymbolic{Real}
+interp_vars = [t]
+interp(interp_vars...)
+arg = pinnrep.dict_depvar_input[:u]
+arg = [g, l]
+pinnrep.indvars
+@parameters (arg...)
+eval(:($interp($(arg...))))
+b = substitute(a, Dict(t => 1))
+@parameters aa[1:2]
+aa = [m, l]
+l
+m
+
+# >why not mask differential
+function get_lossy(pinnrep, dataset, eqs)
+    depvars = pinnrep.depvars # order is same as dataset and interps
+    dict_depvar_input = pinnrep.dict_depvar_input
+
+    Dict_differentials = Dict()
+    exp = toexpr(eqs)
+    for exp_i in exp
+        recur_expression(exp_i, Dict_differentials)
+    end
+    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
+
+    # masking operation
+    println("Dict_differentials : ", Dict_differentials)
+    a = substitute.(eqs, Ref(Dict_differentials))
+    println("Masked Differential term : ", a)
+
+    to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars, eqs)
+    # for each row in dataset create u values for substituing in equation, n_equations=n_rows
+    eq_subs = [Dict(tobe_subs[depvar] => to_subs[depvar][i] for depvar in depvars)
+               for i in 1:size(dataset[1][:, 1])[1]]
+
+    b = []
+    for eq_sub in eq_subs
+        push!(b, [substitute(a_i, eq_sub) for a_i in a])
+    end
+
+    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
+    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
+
+    c = []
+    for b_i in b
+        push!(c, substitute.(b_i, Ref(rev_Dict_differentials)))
+    end
+    println("After re Substituting depvars : ", c[1])
+    # c = vcat(c...)
+    println(c)
+    c
+    # get losses
+    # loss_functions = [NeuralPDE.build_loss_function(pinnrep,
+    #     c[i, :][j],
+    #     pinnrep.pde_indvars[j]) for j in eachindex(pinnrep.pde_indvars)]
+    # return loss_functions
+end
+
+# finally dataset to be fed
+# train sets format [[],[]]
+pinnrep.pde_indvars
+pinnrep = NeuralPDE.symbolic_discretize(pde_system, discretization)
+eqs = pinnrep.eqs
+yuh1 = get_lossy(pinnrep, dataset, eqs)
+pde_loss_functions = [NeuralPDE.merge_strategy_with_loglikelihood_function(pinnrep,
+    GridTraining(0.1),
+    yuh1[i],
+    nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset],
+    train_sets_bc = nothing)[1]
+                      for i in eachindex(yuh1)]
+function L2_loss2(θ, allstd)
+    stdpdes, stdbcs, stdextra = allstd
+    pde_loglikelihoods = [[logpdf(Normal(0, 0.8 * stdpdes[i]), pde_loss_function(θ))
+                           for (i, pde_loss_function) in enumerate(pde_loss_functions[i])]
+                          for i in eachindex(pde_loss_functions)]
+
+    # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+    #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
+    # println("bc_loglikelihoods : ", bc_loglikelihoods)
+    return sum(sum(pde_loglikelihoods))
+    # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+end
+
+L2_loss2([1, 2, 3, 4], [1, 1, 1])
+
+[NeuralPDE.parse_equation(pinnrep, exa) for exa in exam]
+a = "diff_1"
+substitute(a * u(t, x) - cos(p * t) ~ 0, Dict(u(t, x) => 1.0))
+substitute(eqs[1], Dict(u(t, x) => 1.0))
+# dataset_pde has normal matrix format 
+# dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
+function get_symbols(dict_depvar_input, dataset, depvars, eqs)
+    depvar_vals = [dataset_i[:, 1] for dataset_i in dataset]
+    # order of depvars
+    to_subs = Dict(pinnrep.depvars .=> depvar_vals)
+
+    asrt = Symbolics.get_variables.(eqs)
+    # want only symbols of depvars
+    temp = unique(reduce(vcat, asrt))
+    # now we have all the depvars, we now need all depvars whcih can be substituted with data interps
+
+    tobe_subs = Dict()
+    for a in depvars
+        for i in temp
+            expr = toexpr(i)
+            if (expr isa Expr) && (expr.args[1] == a)
+                tobe_subs[a] = i
+            end
+        end
+    end
+
+    return to_subs, tobe_subs
+end
+
+yuh = get_symbols(pinnrep.dict_depvar_input, dataset, pinnrep.depvars, pinnrep.eqs)
+
+function recur_expression(exp, Dict_differentials)
+    for in_exp in exp.args
+        if !(in_exp isa Expr)
+            # skip +,== symbols, characters etc
+            continue
+
+        elseif in_exp.args[1] isa ModelingToolkit.Differential
+            # first symbol of differential term
+            # Dict_differentials for masking differential terms
+            # and resubstituting differentials in equations after putting in interpolations
+            # temp = in_exp.args[end]
+            # in_exp.args[end] = Symbolics.variable(in_exp.args[end])
+
+            Dict_differentials[in_exp] = Symbolics.variable("diff_$(length(Dict_differentials)+1)")
+            return
+        else
+            recur_expression(in_exp, Dict_differentials)
+        end
+    end
+end
+vars = Symbolics.variable.(hcat(pinnrep.indvars, pinnrep.depvars))
+toexpr(Differential(t)(Differential(u)(u(t))) + u(t) ~ 0).args[2]
+eqs
+# Differential(t)(u(t)) - cos(p * t) ~ 0
+exprs = toexpr(eqs)
+pop = Dict()
+recur_expression(exprs, pop)
+pop1 = Dict()
+for (a, b) in pop
+    pop1[eval(a)] = b
+end
+pop1
+a = substitute(eqs, pop1)
+
+transpose(dataset[1])
+pde_system.eqs
+pde_system.bcs
+eqs = pde_system.eqs
+Symbolics.get_variables(eqs[1])
+# eqs=a
+
+NeuralPDE.get_variables(pinnrep.eqs, pinnrep.dict_indvars, pinnrep.dict_depvars)
+NeuralPDE.get_argument(pinnrep.bcs, pinnrep.dict_indvars, pinnrep.dict_depvars)
+dx = pinnrep.strategy.dx
+eltypeθ = eltype(pinnrep.flat_init_params)
+
+# solve dataset physics loss for heterogenous case
+# create number of equations as number of interpolation and points(n rows)
+# follow masking and finally feed training sets as set in interpolations input of u(t,x,..)
+
+# logic for recursion formula to parse differentials
+# # this below has the whole differential term
+toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
+# toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
+# # .args[1] isa ModelingToolkit.Differential
+
+# logic for interpolation and indvars splatting to get Equation parsing terms
+# splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+# # splat datasets onto Linear interpolations tables
+# interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+# interps = Dict(depvars .=> interps)
+# get datasets into splattable form
+# splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+# # splat datasets onto Linear interpolations tables
+# yu = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+# Symbol(:($(yu[1])))
+
+# logic to contrauct dict to feed for masking
+# Dict(interps[depvar] => dict_depvar_input[depvar] for depvar in depvars)
+
+# what do i want?
+# > what do i have?
+# i have a dataset of depvars and corresponding indvars values
+# i want for each equation indvars - get_variables()
+# construct physics losses based on above list and dataset values
+# dataset - dict_depvars_input construct
+# use this on dataset
+
+# from pinnrep and dataset gives eqaution wise datasets
+symbols_input = [(i, pinnrep.dict_depvar_input[i]) for i in pinnrep.depvars]
+eq_args = NeuralPDE.get_argument(eqs, pinnrep.dict_indvars, pinnrep.dict_depvars)
+points = []
+for eq_arg in eq_args
+    a = []
+    for i in eachindex(symbols_input)
+        if symbols_input[i][2] == eq_arg
+            push!(a, dataset[i][2:end])
+        end
+    end
+    push!(points, a)
+end
+typeof(points[1])
+
+d = Dict()
+dataset[1][:, 2:end]'
+Dict(symbols_input[1][2] .=> dataset[1][:, 2:end]')
+symbols_input[1][2] .= dataset[1][:, 2:end]
+for m in symbols_input
+    d[m[2]] .= dataset[i][:, 2]
+end
+d
+for i in eachindex(dataset)
+    dataset[i]
+    # depvars[i]
+end
+
+toexpr(pde_system.eqs)
+pinnrep.
+
+@parameterst, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+# checking all training strategies
+discretization = BayesianPINN([chainl], GridTraining(0.01), param_estim = true,
+    dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)], progress = true)
+Symoblics.value(a)
+ex = :(y(t) ~ x(t))
+parse_expr_to_symbolic(ex[1], Main) # gives the symbolic expression `y(t) ~ x(t)` in empty Main
+
+# Now do a whole system
+
+ex = [:(y ~ x)
+    :(y ~ -2x + 3 / z)
+    :(z ~ 2)]
+eqs = parse_expr_to_symbolic.(ex, (Main,))
+
+@variables x y z
+ex = [y ~ x
+    y ~ -2x + 3 / z
+    z ~ 2]
+all(isequal.(eqs, ex)) # true
\ No newline at end of file

From aa15410ef0533fa25da815774150a8ad47628b90 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Mon, 12 Feb 2024 19:44:39 +0530
Subject: [PATCH 017/107] removed bayesian folder

---
 src/{bayesian => }/BPINN_ode.jl        | 0
 src/NeuralPDE.jl                       | 4 ++--
 src/{bayesian => }/advancedHMC_MCMC.jl | 0
 src/{bayesian => }/collocated_estim.jl | 0
 4 files changed, 2 insertions(+), 2 deletions(-)
 rename src/{bayesian => }/BPINN_ode.jl (100%)
 rename src/{bayesian => }/advancedHMC_MCMC.jl (100%)
 rename src/{bayesian => }/collocated_estim.jl (100%)

diff --git a/src/bayesian/BPINN_ode.jl b/src/BPINN_ode.jl
similarity index 100%
rename from src/bayesian/BPINN_ode.jl
rename to src/BPINN_ode.jl
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 66d6e9c58f..de20f9a88f 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -50,8 +50,8 @@ include("dae_solve.jl")
 include("transform_inf_integral.jl")
 include("discretize.jl")
 include("neural_adapter.jl")
-include("bayesian/advancedHMC_MCMC.jl")
-include("bayesian/BPINN_ode.jl")
+include("advancedHMC_MCMC.jl")
+include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 include("bayesian/collocated_estim.jl")
 
diff --git a/src/bayesian/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
similarity index 100%
rename from src/bayesian/advancedHMC_MCMC.jl
rename to src/advancedHMC_MCMC.jl
diff --git a/src/bayesian/collocated_estim.jl b/src/collocated_estim.jl
similarity index 100%
rename from src/bayesian/collocated_estim.jl
rename to src/collocated_estim.jl

From 9475d27ccdc8e22f110e2298f5c49dde9c818920 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Mon, 12 Feb 2024 19:58:41 +0530
Subject: [PATCH 018/107] cleaned files, removed DataInterpolations

---
 Project.toml                  |   1 -
 src/NeuralPDE.jl              |   3 +-
 test/BPINN_PDEinvsol_tests.jl | 229 +---------------------------------
 3 files changed, 3 insertions(+), 230 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3f82c4de90..fd683ede3b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,7 +10,6 @@ ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 Cubature = "667455a9-e2ce-5579-9412-b964f529a492"
-DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index de20f9a88f..84fb852be9 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -25,7 +25,6 @@ using Symbolics: wrap, unwrap, arguments, operation
 using SymbolicUtils
 using AdvancedHMC, LogDensityProblems, LinearAlgebra, Functors, MCMCChains
 using MonteCarloMeasurements
-using DataInterpolations: LinearInterpolation
 import ModelingToolkit: value, nameof, toexpr, build_expr, expand_derivatives
 import DomainSets: Domain, ClosedInterval
 import ModelingToolkit: Interval, infimum, supremum #,Ball
@@ -53,7 +52,7 @@ include("neural_adapter.jl")
 include("advancedHMC_MCMC.jl")
 include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
-include("bayesian/collocated_estim.jl")
+include("collocated_estim.jl")
 
 export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE, NNDAE,
     KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem,
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 870a6aac53..0a23cfa669 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -205,111 +205,6 @@ u_predict = pmean(sol1.ensemblesol[1])
 @test mean(u_predict .- u_real) < 0.1
 @test sol1.estimated_de_params[1]≈param atol=param * 0.3
 
-# function get_symbols(dict_depvar_input, dataset, depvars, eqs)
-
-#     # get datasets into splattable form
-#     splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
-#     # splat datasets onto Linear interpolations tables
-#     interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
-#     # this works as order of dataset matches order of depvars
-#     interps = Dict(depvars .=> interps)
-
-#     Dict_symbol_interps = Dict(depvar => (interps[depvar], dict_depvar_input[depvar])
-#                                for depvar in depvars)
-
-#     tobe_subs = Dict()
-
-#     asrt = Symbolics.get_variables.(eqs)
-#     # want only symbols of depvars
-#     tempo = unique(reduce(vcat, asrt))[(end - length(depvars) + 1):end]
-#     # now we have all the depvars, we now need all depvars whcih can be substituted with data interps
-
-#     tobe_subs = Dict()
-#     for a in depvars
-#         for i in tempo
-#             if toexpr(i).args[1] == a
-#                 tobe_subs[a] = i
-#             end
-#         end
-#     end
-
-#     # do the same thing as above here using pinnrep.indvars
-#     to_subs = Dict()
-#     for (a, b) in Dict_symbol_interps
-#         b1, b2 = b
-#         for i in tempo
-#             if toexpr(i).args[1] == a
-#                 tobe_subs[a] = i
-#             end
-#         end
-#     end
-#     for (a, b) in Dict_symbol_interps
-#         b1, b2 = b
-#         to_subs[a] = eval(:($b1($(b2...))))
-#         # Symbol("$b1($(b2...))")
-#         # eval(:($b1($(b2...))))
-#     end
-
-#     println("to_subs : ", to_subs)
-#     println("tobe_subs : ", tobe_subs)
-#     return to_subs, tobe_subs
-# end
-
-# function recur_expression(exp, Dict_differentials)
-#     for in_exp in exp.args
-#         if !(in_exp isa Expr)
-#             # skip +,== symbols, characters etc
-#             continue
-
-#         elseif in_exp.args[1] isa ModelingToolkit.Differential
-#             # first symbol of differential term
-#             # Dict_differentials for masking differential terms
-#             # and resubstituting differentials in equations after putting in interpolations
-#             temp = eval(in_exp)
-#             # println(" inside recursion : ")
-#             # println("in_exp went from ", in_exp, " to ", temp)
-#             # println("typeof in_exp went from ", typeof(in_exp), " to ", typeof(temp))
-#             Dict_differentials[temp] = Symbol("diff_$(length(Dict_differentials)+1)")
-#             return
-
-#         else
-#             recur_expression(in_exp, Dict_differentials)
-#         end
-#     end
-# end
-
-# get datafree loss functions for new loss type
-# need to call merge_strategy_with_loss_function() variant after this
-function merge_dataset_with_loss_function(pinnrep::NeuralPDE.PINNRepresentation,
-        dataset,
-        datafree_pde_loss_function,
-        datafree_bc_loss_function)
-    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
-
-    eltypeθ = eltype(pinnrep.flat_init_params)
-
-    train_sets = [[dataset[i][:, 2] for i in eachindex(dataset)], [[0;;], [0;;], [0;;]]]
-
-    # the points in the domain and on the boundary
-    pde_train_sets, bcs_train_sets = train_sets
-    # pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-    #     pde_train_sets)
-    # bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-    #     bcs_train_sets)
-    pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
-                          for (_loss, _set) in zip(datafree_pde_loss_function,
-        pde_train_sets)]
-
-    bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
-                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
-
-    pde_loss_functions, bc_loss_functions
-end
-
-function get_loss_function(loss_function, train_set, eltypeθ; τ = nothing)
-    loss = (θ) -> mean(abs2, loss_function(train_set, θ))
-end
-
 # for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
 # and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
 # eqs is vector of pde eqs and dataset here is dataset_pde
@@ -340,36 +235,6 @@ function get_loss_2(pinnrep, dataset, eqs)
         pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
 end
 
-# -----------------===============
-eqs
-a = substitute.(eqs, Ref(Dict(t => 1)))
-
-# after masking
-# this can remove interpolations need
-b = substitute.(eqs, Ref(Dict(u(t) => interp([1]...))))
-
-toexpr(a[1]).args[2].args[2].args[2](3)
-Symbol("$(u)")
-
-interp = LinearInterpolation([1, 2], [1, 23])
-typeof(interp)
-LinearInterpolation{Vector{Int64}, Vector{Int64}, true, Int64}
-
-typeof(interp(t))
-SymbolicUtils.BasicSymbolic{Real}
-interp_vars = [t]
-interp(interp_vars...)
-arg = pinnrep.dict_depvar_input[:u]
-arg = [g, l]
-pinnrep.indvars
-@parameters (arg...)
-eval(:($interp($(arg...))))
-b = substitute(a, Dict(t => 1))
-@parameters aa[1:2]
-aa = [m, l]
-l
-m
-
 # >why not mask differential
 function get_lossy(pinnrep, dataset, eqs)
     depvars = pinnrep.depvars # order is same as dataset and interps
@@ -415,37 +280,6 @@ function get_lossy(pinnrep, dataset, eqs)
     # return loss_functions
 end
 
-# finally dataset to be fed
-# train sets format [[],[]]
-pinnrep.pde_indvars
-pinnrep = NeuralPDE.symbolic_discretize(pde_system, discretization)
-eqs = pinnrep.eqs
-yuh1 = get_lossy(pinnrep, dataset, eqs)
-pde_loss_functions = [NeuralPDE.merge_strategy_with_loglikelihood_function(pinnrep,
-    GridTraining(0.1),
-    yuh1[i],
-    nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset],
-    train_sets_bc = nothing)[1]
-                      for i in eachindex(yuh1)]
-function L2_loss2(θ, allstd)
-    stdpdes, stdbcs, stdextra = allstd
-    pde_loglikelihoods = [[logpdf(Normal(0, 0.8 * stdpdes[i]), pde_loss_function(θ))
-                           for (i, pde_loss_function) in enumerate(pde_loss_functions[i])]
-                          for i in eachindex(pde_loss_functions)]
-
-    # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
-    #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
-    # println("bc_loglikelihoods : ", bc_loglikelihoods)
-    return sum(sum(pde_loglikelihoods))
-    # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
-end
-
-L2_loss2([1, 2, 3, 4], [1, 1, 1])
-
-[NeuralPDE.parse_equation(pinnrep, exa) for exa in exam]
-a = "diff_1"
-substitute(a * u(t, x) - cos(p * t) ~ 0, Dict(u(t, x) => 1.0))
-substitute(eqs[1], Dict(u(t, x) => 1.0))
 # dataset_pde has normal matrix format 
 # dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
 function get_symbols(dict_depvar_input, dataset, depvars, eqs)
@@ -507,10 +341,6 @@ end
 pop1
 a = substitute(eqs, pop1)
 
-transpose(dataset[1])
-pde_system.eqs
-pde_system.bcs
-eqs = pde_system.eqs
 Symbolics.get_variables(eqs[1])
 # eqs=a
 
@@ -543,46 +373,7 @@ toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
 # logic to contrauct dict to feed for masking
 # Dict(interps[depvar] => dict_depvar_input[depvar] for depvar in depvars)
 
-# what do i want?
-# > what do i have?
-# i have a dataset of depvars and corresponding indvars values
-# i want for each equation indvars - get_variables()
-# construct physics losses based on above list and dataset values
-# dataset - dict_depvars_input construct
-# use this on dataset
-
-# from pinnrep and dataset gives eqaution wise datasets
-symbols_input = [(i, pinnrep.dict_depvar_input[i]) for i in pinnrep.depvars]
-eq_args = NeuralPDE.get_argument(eqs, pinnrep.dict_indvars, pinnrep.dict_depvars)
-points = []
-for eq_arg in eq_args
-    a = []
-    for i in eachindex(symbols_input)
-        if symbols_input[i][2] == eq_arg
-            push!(a, dataset[i][2:end])
-        end
-    end
-    push!(points, a)
-end
-typeof(points[1])
-
-d = Dict()
-dataset[1][:, 2:end]'
-Dict(symbols_input[1][2] .=> dataset[1][:, 2:end]')
-symbols_input[1][2] .= dataset[1][:, 2:end]
-for m in symbols_input
-    d[m[2]] .= dataset[i][:, 2]
-end
-d
-for i in eachindex(dataset)
-    dataset[i]
-    # depvars[i]
-end
-
-toexpr(pde_system.eqs)
-pinnrep.
-
-@parameterst, p
+@parameters t, p
 @variables u(..)
 
 Dt = Differential(t)
@@ -618,20 +409,4 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     phystd = [0.01], l2std = [0.01],
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)], progress = true)
-Symoblics.value(a)
-ex = :(y(t) ~ x(t))
-parse_expr_to_symbolic(ex[1], Main) # gives the symbolic expression `y(t) ~ x(t)` in empty Main
-
-# Now do a whole system
-
-ex = [:(y ~ x)
-    :(y ~ -2x + 3 / z)
-    :(z ~ 2)]
-eqs = parse_expr_to_symbolic.(ex, (Main,))
-
-@variables x y z
-ex = [y ~ x
-    y ~ -2x + 3 / z
-    z ~ 2]
-all(isequal.(eqs, ex)) # true
\ No newline at end of file
+    param = [LogNormal(6.0, 0.5)], progress = true)
\ No newline at end of file

From b50989a95f60c37c497fc4bb9e9cfe1eb30bfb3c Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 15 Feb 2024 02:52:44 +0530
Subject: [PATCH 019/107] done with implementation

---
 src/PDE_BPINN.jl              |  72 ++-----
 src/discretize.jl             |   8 +-
 src/training_strategies.jl    |  32 ++-
 test/BPINN_PDEinvsol_tests.jl | 362 ++++++++++++----------------------
 4 files changed, 163 insertions(+), 311 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index d53110e20d..7cbe22a804 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -67,53 +67,36 @@ end
 # and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
 # eqs is vector of pde eqs and dataset here is dataset_pde
 # normally you get vector of losses
-function get_lossy(pinnrep, dataset, eqs)
+function get_lossy(pinnrep, dataset, Dict_differentials)
+    eqs = pinnrep.eqs
     depvars = pinnrep.depvars # order is same as dataset and interps
-    dict_depvar_input = pinnrep.dict_depvar_input
-
-    Dict_differentials0 = Dict()
-    exp = toexpr(eqs)
-    Symbolics.variable.(hcat(pinnrep.indvars, pinnrep.depvars))
-    for exp_i in exp
-        recur_expression(exp_i, Dict_differentials0)
-    end
-    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
-
-    Dict_differentials = Dict()
-    for (a, b) in Dict_differentials0
-        # println(eval(a.args[1]))
-        # Symbolics.operation(Symbolics.value(z))
-        println(a)
-        a = Symbolics.parse_expr_to_symbolic(a, NeuralPDE)
-        Dict_differentials[a] = b
-    end
 
+    # Dict_differentials is filled with Differential operator => diff_i key-value pairs
     # masking operation
-    println("Dict_differentials : ", Dict_differentials)
-    a = substitute.(eqs, Ref(Dict_differentials))
-    println("Masked Differential term : ", a)
+    eqs_new = substitute.(eqs, Ref(Dict_differentials))
 
     to_subs, tobe_subs = get_symbols(dataset, depvars, eqs)
     # for each row in dataset create u values for substituing in equation, n_equations=n_rows
     eq_subs = [Dict(tobe_subs[depvar] => to_subs[depvar][i] for depvar in depvars)
                for i in 1:size(dataset[1][:, 1])[1]]
 
+    # for each point(eq_sub dictionary) substiute in all equations(eqs_new - masked equations)
     b = []
     for eq_sub in eq_subs
-        push!(b, [substitute(a_i, eq_sub) for a_i in a])
+        push!(b, [substitute(eq, eq_sub) for eq in eqs_new])
     end
+    # now we have vector of equation vectors
 
     # reverse dict for re-substituing values of Differential(t)(u(t)) etc
     rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
 
+    # for each vector in vecvtor of equation vectorbroadcast resubstituing OG mask values
     c = []
     for b_i in b
         push!(c, substitute.(b_i, Ref(rev_Dict_differentials)))
     end
-    println("After re Substituting depvars : ", c[1])
-    # c = hcat(c...)
 
-    # get losses
+    # get losses, zip each equation with args for each build_loss call per equation vector
     loss_functions = [[build_loss_function(pinnrep, eq, pde_indvar)
                        for (eq, pde_indvar, integration_indvar) in zip(c[i],
         pinnrep.pde_indvars,
@@ -146,27 +129,6 @@ function get_symbols(dataset, depvars, eqs)
     return to_subs, tobe_subs
 end
 
-function recur_expression(exp, Dict_differentials)
-    for in_exp in exp.args
-        if !(in_exp isa Expr)
-            # skip +,== symbols, characters etc
-            continue
-
-        elseif in_exp.args[1] isa ModelingToolkit.Differential
-            # first symbol of differential term
-            # Dict_differentials for masking differential terms
-            # and resubstituting differentials in equations after putting in interpolations
-            println("starting")
-            Dict_differentials[in_exp] = Symbolics.variable("diff_$(length(Dict_differentials)+1)")
-            println("ending")
-            return
-
-        else
-            recur_expression(in_exp, Dict_differentials)
-        end
-    end
-end
-
 function LogDensityProblems.logdensity(Tar::PDELogTargetDensity, θ)
     # for parameter estimation neccesarry to use multioutput case
     return Tar.full_loglikelihood(setparameters(Tar, θ),
@@ -398,31 +360,33 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
-        numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)
+        numensemble = floor(Int, draw_samples / 3), Dict_differentials = Dict(),
+        progress = false, verbose = false)
     pinnrep = symbolic_discretize(pde_system, discretization)
     dataset_pde, dataset_bc = discretization.dataset
 
-    eqs = pinnrep.eqs
-    yuh1 = get_lossy(pinnrep, dataset_pde, eqs)
+    yuh1 = get_lossy(pinnrep, dataset_pde, Dict_differentials)
     # eqs = pinnrep.bcs
     # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
 
+    # this is a vector of tuple{vector,nothing}
     pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
         GridTraining(0.1),
         yuh1[i],
         nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset_pde],
-        train_sets_bc = nothing)[1]
+        train_sets_bc = nothing)
                           for i in eachindex(yuh1)]
 
     function L2_loss2(θ, allstd)
         stdpdes, stdbcs, stdextra = allstd
-        pde_loglikelihoods = [[logpdf(Normal(0, 0.8 * stdpdes[i]), pde_loss_function(θ))
-                               for (i, pde_loss_function) in enumerate(pde_loss_functions[i])]
+        # first vector of losses,from tuple -> pde losses, first[1] pde loss
+        pde_loglikelihoods = [[logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
+                               for (j, pde_loss_function) in enumerate(pde_loss_functions[i][1])]
                               for i in eachindex(pde_loss_functions)]
 
         # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
         #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
-        # println("bc_loglikelihoods : ", bc_loglikelihoods)
+
         return sum(sum(pde_loglikelihoods))
         # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
     end
diff --git a/src/discretize.jl b/src/discretize.jl
index af035980b3..a6c7c3bed1 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -608,7 +608,9 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
 
     function get_likelihood_estimate_function(discretization::BayesianPINN)
         dataset_pde, dataset_bc = discretization.dataset
-        
+        dataset_pde = dataset_pde isa Nothing ? dataset_pde : get_dataset_train_points(eqs, dataset_pde, pinnrep)
+        dataset_bc = dataset_bc isa Nothing ? dataset_bc : get_dataset_train_points(eqs, dataset_bc, pinnrep)
+
         # required as Physics loss also needed on the discrete dataset domain points
         # data points are discrete and so by default GridTraining loss applies
         # passing placeholder dx with GridTraining, it uses data points irl
@@ -616,7 +618,9 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             merge_strategy_with_loglikelihood_function(pinnrep,
                 GridTraining(0.1),
                 datafree_pde_loss_functions,
-                datafree_bc_loss_functions, train_sets_pde = dataset_pde, train_sets_bc = dataset_bc)
+                datafree_bc_loss_functions,
+                train_sets_pde = dataset_pde,
+                train_sets_bc = dataset_bc)
         else
             (nothing, nothing)
         end
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index a2e4624f23..ffec6fe2d2 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -21,15 +21,22 @@ function get_dataset_train_points(eqs, train_sets, pinnrep)
     dict_indvars = pinnrep.dict_indvars
 
     symbols_input = [(i, dict_depvar_input[i]) for i in depvars]
+    # [(:u, [:t])]
     eq_args = NeuralPDE.get_argument(eqs, dict_indvars, dict_depvars)
+    # [[:t]]
+
     points = []
     for eq_arg in eq_args
         a = []
+        # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
         for i in eachindex(symbols_input)
             if symbols_input[i][2] == eq_arg
+                # include domain points of that depvar
+                # each loss equation take domain matrix [points..;points..]
                 push!(a, train_sets[i][:, 2:end]')
             end
         end
+        # vcat as new row for next equation
         push!(points, vcat(a...))
     end
     return points
@@ -39,19 +46,16 @@ end
 function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
         strategy::GridTraining,
         datafree_pde_loss_function,
-        datafree_bc_loss_function; train_sets_pde = nothing,train_sets_bc=nothing)
+        datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc=nothing)
     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
 
     eltypeθ = eltype(pinnrep.flat_init_params)
 
     # is vec as later each _set in pde_train_sets are columns as points transformed to vector of points (pde_train_sets must be rowwise)
     pde_loss_functions = if !(train_sets_pde isa Nothing)
-
-        pde_train_sets = get_dataset_train_points(eqs, train_sets_pde, pinnrep)
-        println(" pde train set : ", pde_train_sets)
-        println("type pde  train set : ", size(pde_train_sets))
         pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-            pde_train_sets)
+            train_sets_pde)
+
         [get_loss_function(_loss, _set, eltypeθ, strategy)
                               for (_loss, _set) in zip(datafree_pde_loss_function,
             pde_train_sets)]
@@ -60,12 +64,9 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
     end
     
     bc_loss_functions = if !(train_sets_bc isa Nothing)
-
-        bcs_train_sets = get_dataset_train_points(bcs, train_sets_bc, pinnrep)
-        println("bcs train set : ", bcs_train_sets)
-        println("type bcs train set : ", size(bcs_train_sets))
         bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-        bcs_train_sets)
+            train_sets_bc)
+
         [get_loss_function(_loss, _set, eltypeθ, strategy)
                          for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
     else
@@ -92,16 +93,13 @@ function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
                             pde_train_sets)
     bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
                             bcs_train_sets)
+
     pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
                           for (_loss, _set) in zip(datafree_pde_loss_function,
                                                    pde_train_sets)]
-
     bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
-                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
-                            println("pde_train_sets : ",pde_train_sets)
-                            println("pde_train_sets : ",size(pde_train_sets))
-                            println("bc_train_sets : ",bcs_train_sets)
-                            println("bc_train_sets : ",size(bcs_train_sets))
+                         for (_loss, _set) in zip(datafree_bc_loss_function,
+                                                   bcs_train_sets)]
     pde_loss_functions, bc_loss_functions
 end
 
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 0a23cfa669..c659874afa 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -154,159 +154,6 @@ end
     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
 end
 
-@parameters t, x, p
-@variables u(..)
-Dt = Differential(t)
-Dx = Differential(x)
-eqs = [u(t, x) * Dt(u(t, x)) - cos(p * t) ~ 0, u(t, x) + Dx(u(t, x)) ~ 0.0]
-bcs = [u(0, x) ~ 0.0, u(t, 10) ~ 1.0]
-domains = [t ∈ Interval(0.0, 2.0), x ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(2, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t, x],
-    [u(t, x)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, u1, timepoints)]
-
-# checking all training strategies
-# discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-#     dataset = [dataset, nothing])
-
-discretization = BayesianPINN([chainl],
-    GridTraining([0.2, 0.2]),
-    param_estim = true, dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.01, 0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0, 1 / 20.0],
-    param = [Normal(3.0, 0.5)], progress = true)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-# for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
-# and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
-# eqs is vector of pde eqs and dataset here is dataset_pde
-# normally you get vector of losses
-function get_loss_2(pinnrep, dataset, eqs)
-    depvars = pinnrep.depvars # order is same as dataset and interps
-    dict_depvar_input = pinnrep.dict_depvar_input
-
-    to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars)
-    interp_subs_dict = Dict(tobe_subs[depvar] => to_subs[depvar] for depvar in depvars)
-
-    Dict_differentials = Dict()
-    exp = toexpr(eqs)
-    void_value = [recur_expression(exp_i, Dict_differentials) for exp_i in exp]
-    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
-
-    # masking operation
-    a = substitute.(eqs, Ref(Dict_differentials))
-    println(a)
-    b = substitute.(a, Ref(interp_subs_dict))
-    println(b)
-    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
-    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
-    eqs = substitute.(b, Ref(rev_Dict_differentials))
-    # get losses
-    loss_functions = [NeuralPDE.build_loss_function(pinnrep,
-        eqs[i],
-        pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
-end
-
-# >why not mask differential
-function get_lossy(pinnrep, dataset, eqs)
-    depvars = pinnrep.depvars # order is same as dataset and interps
-    dict_depvar_input = pinnrep.dict_depvar_input
-
-    Dict_differentials = Dict()
-    exp = toexpr(eqs)
-    for exp_i in exp
-        recur_expression(exp_i, Dict_differentials)
-    end
-    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
-
-    # masking operation
-    println("Dict_differentials : ", Dict_differentials)
-    a = substitute.(eqs, Ref(Dict_differentials))
-    println("Masked Differential term : ", a)
-
-    to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars, eqs)
-    # for each row in dataset create u values for substituing in equation, n_equations=n_rows
-    eq_subs = [Dict(tobe_subs[depvar] => to_subs[depvar][i] for depvar in depvars)
-               for i in 1:size(dataset[1][:, 1])[1]]
-
-    b = []
-    for eq_sub in eq_subs
-        push!(b, [substitute(a_i, eq_sub) for a_i in a])
-    end
-
-    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
-    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
-
-    c = []
-    for b_i in b
-        push!(c, substitute.(b_i, Ref(rev_Dict_differentials)))
-    end
-    println("After re Substituting depvars : ", c[1])
-    # c = vcat(c...)
-    println(c)
-    c
-    # get losses
-    # loss_functions = [NeuralPDE.build_loss_function(pinnrep,
-    #     c[i, :][j],
-    #     pinnrep.pde_indvars[j]) for j in eachindex(pinnrep.pde_indvars)]
-    # return loss_functions
-end
-
-# dataset_pde has normal matrix format 
-# dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
-function get_symbols(dict_depvar_input, dataset, depvars, eqs)
-    depvar_vals = [dataset_i[:, 1] for dataset_i in dataset]
-    # order of depvars
-    to_subs = Dict(pinnrep.depvars .=> depvar_vals)
-
-    asrt = Symbolics.get_variables.(eqs)
-    # want only symbols of depvars
-    temp = unique(reduce(vcat, asrt))
-    # now we have all the depvars, we now need all depvars whcih can be substituted with data interps
-
-    tobe_subs = Dict()
-    for a in depvars
-        for i in temp
-            expr = toexpr(i)
-            if (expr isa Expr) && (expr.args[1] == a)
-                tobe_subs[a] = i
-            end
-        end
-    end
-
-    return to_subs, tobe_subs
-end
-
-yuh = get_symbols(pinnrep.dict_depvar_input, dataset, pinnrep.depvars, pinnrep.eqs)
-
 function recur_expression(exp, Dict_differentials)
     for in_exp in exp.args
         if !(in_exp isa Expr)
@@ -318,95 +165,134 @@ function recur_expression(exp, Dict_differentials)
             # Dict_differentials for masking differential terms
             # and resubstituting differentials in equations after putting in interpolations
             # temp = in_exp.args[end]
-            # in_exp.args[end] = Symbolics.variable(in_exp.args[end])
-
-            Dict_differentials[in_exp] = Symbolics.variable("diff_$(length(Dict_differentials)+1)")
+            Dict_differentials[eval(in_exp)] = Symbolics.variable("diff_$(length(Dict_differentials) + 1)")
             return
         else
             recur_expression(in_exp, Dict_differentials)
         end
     end
 end
-vars = Symbolics.variable.(hcat(pinnrep.indvars, pinnrep.depvars))
-toexpr(Differential(t)(Differential(u)(u(t))) + u(t) ~ 0).args[2]
-eqs
-# Differential(t)(u(t)) - cos(p * t) ~ 0
-exprs = toexpr(eqs)
-pop = Dict()
-recur_expression(exprs, pop)
-pop1 = Dict()
-for (a, b) in pop
-    pop1[eval(a)] = b
+
+@testset "Example 3: 2D Periodic System with New parameter estimation" begin
+    # Cos(pi*t) periodic curve
+    @parameters t, p
+    @variables u(..)
+
+    Dt = Differential(t)
+    eqs = Dt(u(t)) - cos(p * t) ~ 0
+    bcs = [u(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 2.0)]
+
+    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+    initl, st = Lux.setup(Random.default_rng(), chainl)
+
+    @named pde_system = PDESystem(eqs,
+        bcs,
+        domains,
+        [t],
+        [u(t)],
+        [p],
+        defaults = Dict([p => 4.0]))
+
+    analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+    timepoints = collect(0.0:(1 / 100.0):2.0)
+    u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+    u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+    dataset = [hcat(u1, timepoints)]
+
+    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+        dataset = [dataset, nothing])
+
+    # creating dictionary for masking equations
+    eqs = pde_system.eqs
+    Dict_differentials = Dict()
+    exps = toexpr.(eqs)
+    nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)],
+        Dict_differentials = Dict_differentials)
+
+    param = 2 * π
+    ts = vec(sol1.timepoints[1])
+    u_real = [analytic_sol_func1(0.0, t) for t in ts]
+    u_predict = pmean(sol1.ensemblesol[1])
+
+    @test u_predict≈u_real atol=1.5
+    @test mean(u_predict .- u_real) < 0.1
+    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
 end
-pop1
-a = substitute(eqs, pop1)
-
-Symbolics.get_variables(eqs[1])
-# eqs=a
-
-NeuralPDE.get_variables(pinnrep.eqs, pinnrep.dict_indvars, pinnrep.dict_depvars)
-NeuralPDE.get_argument(pinnrep.bcs, pinnrep.dict_indvars, pinnrep.dict_depvars)
-dx = pinnrep.strategy.dx
-eltypeθ = eltype(pinnrep.flat_init_params)
-
-# solve dataset physics loss for heterogenous case
-# create number of equations as number of interpolation and points(n rows)
-# follow masking and finally feed training sets as set in interpolations input of u(t,x,..)
-
-# logic for recursion formula to parse differentials
-# # this below has the whole differential term
-toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
-# toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
-# # .args[1] isa ModelingToolkit.Differential
-
-# logic for interpolation and indvars splatting to get Equation parsing terms
-# splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
-# # splat datasets onto Linear interpolations tables
-# interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
-# interps = Dict(depvars .=> interps)
-# get datasets into splattable form
-# splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
-# # splat datasets onto Linear interpolations tables
-# yu = [LinearInterpolation(splat_i...) for splat_i in splat_form]
-# Symbol(:($(yu[1])))
-
-# logic to contrauct dict to feed for masking
-# Dict(interps[depvar] => dict_depvar_input[depvar] for depvar in depvars)
-
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-# checking all training strategies
-discretization = BayesianPINN([chainl], GridTraining(0.01), param_estim = true,
-    dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)], progress = true)
\ No newline at end of file
+
+@testset "Example 4: Lorenz System with New parameter estimation" begin
+    @parameters t, σ_
+    @variables x(..), y(..), z(..)
+    Dt = Differential(t)
+    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+        Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 1.0)]
+
+    input_ = length(domains)
+    n = 7
+    chain = [
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+    ]
+
+    #Generate Data
+    function lorenz!(du, u, p, t)
+        du[1] = 10.0 * (u[2] - u[1])
+        du[2] = u[1] * (28.0 - u[3]) - u[2]
+        du[3] = u[1] * u[2] - (8 / 3) * u[3]
+    end
+
+    u0 = [1.0; 0.0; 0.0]
+    tspan = (0.0, 1.0)
+    prob = ODEProblem(lorenz!, u0, tspan)
+    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+    ts = sol.t
+    us = hcat(sol.u...)
+    us = us .+ ((0.05 .* randn(size(us))) .* us)
+    ts_ = hcat(sol(ts).t...)[1, :]
+    dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+        dataset = [dataset, nothing])
+
+    @named pde_system = PDESystem(eqs, bcs, domains,
+        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+    # creating dictionary for masking equations
+    eqs = pde_system.eqs
+    Dict_differentials = Dict()
+    exps = toexpr.(eqs)
+    nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 50,
+        bcstd = [0.3, 0.3, 0.3],
+        phystd = [0.1, 0.1, 0.1],
+        l2std = [1, 1, 1],
+        priorsNNw = (0.0, 1.0),
+        saveats = [0.01],
+        param = [Normal(12.0, 2)],
+        Dict_differentials = Dict_differentials)
+
+    idealp = 10.0
+    p_ = sol1.estimated_de_params[1]
+    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+end
\ No newline at end of file

From 2fbe4a9b4765542a76d2e48e31f74525b815a922 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 15 Feb 2024 03:27:10 +0530
Subject: [PATCH 020/107] update BPINN_PDEinvsol_tests.jl

---
 test/BPINN_PDEinvsol_tests.jl | 292 +++++++++++++++++-----------------
 1 file changed, 146 insertions(+), 146 deletions(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index c659874afa..de26c0a208 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -7,152 +7,152 @@ using ComponentArrays, ModelingToolkit
 
 Random.seed!(100)
 
-@testset "Example 1: 2D Periodic System with parameter estimation" begin
-    # Cos(pi*t) periodic curve
-    @parameters t, p
-    @variables u(..)
-
-    Dt = Differential(t)
-    eqs = Dt(u(t)) - cos(p * t) ~ 0
-    bcs = [u(0) ~ 0.0]
-    domains = [t ∈ Interval(0.0, 2.0)]
-
-    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-    initl, st = Lux.setup(Random.default_rng(), chainl)
-
-    @named pde_system = PDESystem(eqs,
-        bcs,
-        domains,
-        [t],
-        [u(t)],
-        [p],
-        defaults = Dict([p => 4.0]))
-
-    analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-    timepoints = collect(0.0:(1 / 100.0):2.0)
-    u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-    u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-    dataset = [hcat(u1, timepoints)]
-
-    # checking all training strategies
-    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-        dataset = [dataset, nothing])
-
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    param = 2 * π
-    ts = vec(sol1.timepoints[1])
-    u_real = [analytic_sol_func1(0.0, t) for t in ts]
-    u_predict = pmean(sol1.ensemblesol[1])
-
-    @test u_predict≈u_real atol=1.5
-    @test mean(u_predict .- u_real) < 0.1
-    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
-end
-
-@testset "Example 2: Lorenz System with parameter estimation" begin
-    @parameters t, σ_
-    @variables x(..), y(..), z(..)
-    Dt = Differential(t)
-    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-        Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-    domains = [t ∈ Interval(0.0, 1.0)]
-
-    input_ = length(domains)
-    n = 7
-    chain = [
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-    ]
-
-    #Generate Data
-    function lorenz!(du, u, p, t)
-        du[1] = 10.0 * (u[2] - u[1])
-        du[2] = u[1] * (28.0 - u[3]) - u[2]
-        du[3] = u[1] * u[2] - (8 / 3) * u[3]
-    end
-
-    u0 = [1.0; 0.0; 0.0]
-    tspan = (0.0, 1.0)
-    prob = ODEProblem(lorenz!, u0, tspan)
-    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-    ts = sol.t
-    us = hcat(sol.u...)
-    us = us .+ ((0.05 .* randn(size(us))) .* us)
-    ts_ = hcat(sol(ts).t...)[1, :]
-    dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-        dataset = [dataset, nothing])
-
-    @named pde_system = PDESystem(eqs, bcs, domains,
-        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 50,
-        bcstd = [0.3, 0.3, 0.3],
-        phystd = [0.1, 0.1, 0.1],
-        l2std = [1, 1, 1],
-        priorsNNw = (0.0, 1.0),
-        saveats = [0.01],
-        param = [Normal(12.0, 2)])
-
-    idealp = 10.0
-    p_ = sol1.estimated_de_params[1]
-    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-end
+# @testset "Example 1: 2D Periodic System with parameter estimation" begin
+#     # Cos(pi*t) periodic curve
+#     @parameters t, p
+#     @variables u(..)
+
+#     Dt = Differential(t)
+#     eqs = Dt(u(t)) - cos(p * t) ~ 0
+#     bcs = [u(0) ~ 0.0]
+#     domains = [t ∈ Interval(0.0, 2.0)]
+
+#     chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+#     initl, st = Lux.setup(Random.default_rng(), chainl)
+
+#     @named pde_system = PDESystem(eqs,
+#         bcs,
+#         domains,
+#         [t],
+#         [u(t)],
+#         [p],
+#         defaults = Dict([p => 4.0]))
+
+#     analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+#     timepoints = collect(0.0:(1 / 100.0):2.0)
+#     u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+#     u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+#     dataset = [hcat(u1, timepoints)]
+
+#     # checking all training strategies
+#     discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
+#         dataset = [dataset, nothing])
+
+#     ahmc_bayesian_pinn_pde(pde_system,
+#         discretization;
+#         draw_samples = 1500,
+#         bcstd = [0.05],
+#         phystd = [0.01], l2std = [0.01],
+#         priorsNNw = (0.0, 1.0),
+#         saveats = [1 / 50.0],
+#         param = [LogNormal(6.0, 0.5)])
+
+#     discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
+#         dataset = [dataset, nothing])
+
+#     ahmc_bayesian_pinn_pde(pde_system,
+#         discretization;
+#         draw_samples = 1500,
+#         bcstd = [0.05],
+#         phystd = [0.01], l2std = [0.01],
+#         priorsNNw = (0.0, 1.0),
+#         saveats = [1 / 50.0],
+#         param = [LogNormal(6.0, 0.5)])
+
+#     discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
+#         dataset = [dataset, nothing])
+
+#     ahmc_bayesian_pinn_pde(pde_system,
+#         discretization;
+#         draw_samples = 1500,
+#         bcstd = [0.05],
+#         phystd = [0.01], l2std = [0.01],
+#         priorsNNw = (0.0, 1.0),
+#         saveats = [1 / 50.0],
+#         param = [LogNormal(6.0, 0.5)])
+
+#     discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+#         dataset = [dataset, nothing])
+
+#     sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#         discretization;
+#         draw_samples = 1500,
+#         bcstd = [0.05],
+#         phystd = [0.01], l2std = [0.01],
+#         priorsNNw = (0.0, 1.0),
+#         saveats = [1 / 50.0],
+#         param = [LogNormal(6.0, 0.5)])
+
+#     param = 2 * π
+#     ts = vec(sol1.timepoints[1])
+#     u_real = [analytic_sol_func1(0.0, t) for t in ts]
+#     u_predict = pmean(sol1.ensemblesol[1])
+
+#     @test u_predict≈u_real atol=1.5
+#     @test mean(u_predict .- u_real) < 0.1
+#     @test sol1.estimated_de_params[1]≈param atol=param * 0.3
+# end
+
+# @testset "Example 2: Lorenz System with parameter estimation" begin
+#     @parameters t, σ_
+#     @variables x(..), y(..), z(..)
+#     Dt = Differential(t)
+#     eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+#         Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+#         Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+#     bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+#     domains = [t ∈ Interval(0.0, 1.0)]
+
+#     input_ = length(domains)
+#     n = 7
+#     chain = [
+#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+#             Lux.Dense(n, 1)),
+#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+#             Lux.Dense(n, 1)),
+#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+#             Lux.Dense(n, 1)),
+#     ]
+
+#     #Generate Data
+#     function lorenz!(du, u, p, t)
+#         du[1] = 10.0 * (u[2] - u[1])
+#         du[2] = u[1] * (28.0 - u[3]) - u[2]
+#         du[3] = u[1] * u[2] - (8 / 3) * u[3]
+#     end
+
+#     u0 = [1.0; 0.0; 0.0]
+#     tspan = (0.0, 1.0)
+#     prob = ODEProblem(lorenz!, u0, tspan)
+#     sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+#     ts = sol.t
+#     us = hcat(sol.u...)
+#     us = us .+ ((0.05 .* randn(size(us))) .* us)
+#     ts_ = hcat(sol(ts).t...)[1, :]
+#     dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+#     discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+#         dataset = [dataset, nothing])
+
+#     @named pde_system = PDESystem(eqs, bcs, domains,
+#         [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+#     sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#         discretization;
+#         draw_samples = 50,
+#         bcstd = [0.3, 0.3, 0.3],
+#         phystd = [0.1, 0.1, 0.1],
+#         l2std = [1, 1, 1],
+#         priorsNNw = (0.0, 1.0),
+#         saveats = [0.01],
+#         param = [Normal(12.0, 2)])
+
+#     idealp = 10.0
+#     p_ = sol1.estimated_de_params[1]
+#     @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+#     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+# end
 
 function recur_expression(exp, Dict_differentials)
     for in_exp in exp.args

From e0028028f63fa1a343e05bf8021c9f001b8130ba Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 15 Feb 2024 10:42:52 +0530
Subject: [PATCH 021/107] spellings, newloss now optional

---
 src/PDE_BPINN.jl              |  74 +++++----
 src/collocated_estim.jl       |   2 +-
 test/BPINN_PDE_tests.jl       |   2 +-
 test/BPINN_PDEinvsol_tests.jl | 292 +++++++++++++++++-----------------
 4 files changed, 188 insertions(+), 182 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 7cbe22a804..a223c58e20 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -64,7 +64,7 @@ mutable struct PDELogTargetDensity{
 end
 
 # for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
-# and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
+# and final loss for bc must be together in a vector(bcs has separate type of dataset_bc)
 # eqs is vector of pde eqs and dataset here is dataset_pde
 # normally you get vector of losses
 function get_lossy(pinnrep, dataset, Dict_differentials)
@@ -76,7 +76,7 @@ function get_lossy(pinnrep, dataset, Dict_differentials)
     eqs_new = substitute.(eqs, Ref(Dict_differentials))
 
     to_subs, tobe_subs = get_symbols(dataset, depvars, eqs)
-    # for each row in dataset create u values for substituing in equation, n_equations=n_rows
+    # for each row in dataset create u values for substituting in equation, n_equations=n_rows
     eq_subs = [Dict(tobe_subs[depvar] => to_subs[depvar][i] for depvar in depvars)
                for i in 1:size(dataset[1][:, 1])[1]]
 
@@ -87,10 +87,10 @@ function get_lossy(pinnrep, dataset, Dict_differentials)
     end
     # now we have vector of equation vectors
 
-    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
+    # reverse dict for re-substituting values of Differential(t)(u(t)) etc
     rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
 
-    # for each vector in vecvtor of equation vectorbroadcast resubstituing OG mask values
+    # for each vector in vector of equation vectorbroadcast resubstituing OG mask values
     c = []
     for b_i in b
         push!(c, substitute.(b_i, Ref(rev_Dict_differentials)))
@@ -131,9 +131,11 @@ end
 
 function LogDensityProblems.logdensity(Tar::PDELogTargetDensity, θ)
     # for parameter estimation neccesarry to use multioutput case
-    return Tar.full_loglikelihood(setparameters(Tar, θ),
-               Tar.allstd) + priorlogpdf(Tar, θ) + L2LossData(Tar, θ) +
-           Tar.L2_loss2(setparameters(Tar, θ), Tar.allstd)
+    if Tar.L2_loss2 isa Nothing
+        return Tar.full_loglikelihood(setparameters(Tar, θ),Tar.allstd) + priorlogpdf(Tar, θ) + L2LossData(Tar, θ)
+    else
+        return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) + priorlogpdf(Tar, θ) + L2LossData(Tar, θ) + Tar.L2_loss2(setparameters(Tar, θ), Tar.allstd)
+    end
 end
 
 function setparameters(Tar::PDELogTargetDensity, θ)
@@ -360,38 +362,42 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
-        numensemble = floor(Int, draw_samples / 3), Dict_differentials = Dict(),
+        numensemble = floor(Int, draw_samples / 3), Dict_differentials = nothing,
         progress = false, verbose = false)
     pinnrep = symbolic_discretize(pde_system, discretization)
     dataset_pde, dataset_bc = discretization.dataset
 
-    yuh1 = get_lossy(pinnrep, dataset_pde, Dict_differentials)
-    # eqs = pinnrep.bcs
-    # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
-
-    # this is a vector of tuple{vector,nothing}
-    pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
-        GridTraining(0.1),
-        yuh1[i],
-        nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset_pde],
-        train_sets_bc = nothing)
-                          for i in eachindex(yuh1)]
-
-    function L2_loss2(θ, allstd)
-        stdpdes, stdbcs, stdextra = allstd
-        # first vector of losses,from tuple -> pde losses, first[1] pde loss
-        pde_loglikelihoods = [[logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
-                               for (j, pde_loss_function) in enumerate(pde_loss_functions[i][1])]
-                              for i in eachindex(pde_loss_functions)]
-
-        # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
-        #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
-
-        return sum(sum(pde_loglikelihoods))
-        # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+    newloss = if Dict_differentials isa Nothing
+        nothing
+    else
+        yuh1 = get_lossy(pinnrep, dataset_pde, Dict_differentials)
+        # eqs = pinnrep.bcs
+        # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
+
+        # this is a vector of tuple{vector,nothing}
+        pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
+            GridTraining(0.1),
+            yuh1[i],
+            nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset_pde],
+            train_sets_bc = nothing)
+                              for i in eachindex(yuh1)]
+
+        function L2_loss2(θ, allstd)
+            stdpdes, stdbcs, stdextra = allstd
+            # first vector of losses,from tuple -> pde losses, first[1] pde loss
+            pde_loglikelihoods = [[logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
+                                   for (j, pde_loss_function) in enumerate(pde_loss_functions[i][1])]
+                                  for i in eachindex(pde_loss_functions)]
+
+            # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+            #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
+
+            return sum(sum(pde_loglikelihoods))
+            # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+        end
     end
 
-    # WIP split dataset to respective equations
+    # [WIP] add overall functionality for BC dataset points
     if ((dataset_bc isa Nothing) && (dataset_pde isa Nothing))
         dataset = nothing
     elseif dataset_bc isa Nothing
@@ -468,7 +474,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         ninv,
         initial_nnθ,
         full_weighted_loglikelihood,
-        L2_loss2,
+        newloss,
         Φ)
 
     Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
diff --git a/src/collocated_estim.jl b/src/collocated_estim.jl
index b113b76f12..a2f81b3ed9 100644
--- a/src/collocated_estim.jl
+++ b/src/collocated_estim.jl
@@ -175,7 +175,7 @@ function calculate_derivatives(dataset)
     # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
     # interp = CubicSpline(u, t)
     # interp1 = CubicSpline(u1, t)
-    # # derrivatives interpolation
+    # # derivatives interpolation
     # dx = t[2] - t[1]
     # time = collect(t[1]:dx:t[end])
     # smoothu = [interp(i) for i in time]
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index cd8f6ef466..6dd3637f5a 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -205,4 +205,4 @@ end
     u_real = vec([analytic_sol_func(t) for t in ts])
     u_predict = pmean(sol1.ensemblesol[1])
     @test u_predict≈u_real atol=0.8
-end
\ No newline at end of file
+end
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index de26c0a208..c659874afa 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -7,152 +7,152 @@ using ComponentArrays, ModelingToolkit
 
 Random.seed!(100)
 
-# @testset "Example 1: 2D Periodic System with parameter estimation" begin
-#     # Cos(pi*t) periodic curve
-#     @parameters t, p
-#     @variables u(..)
-
-#     Dt = Differential(t)
-#     eqs = Dt(u(t)) - cos(p * t) ~ 0
-#     bcs = [u(0) ~ 0.0]
-#     domains = [t ∈ Interval(0.0, 2.0)]
-
-#     chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-#     initl, st = Lux.setup(Random.default_rng(), chainl)
-
-#     @named pde_system = PDESystem(eqs,
-#         bcs,
-#         domains,
-#         [t],
-#         [u(t)],
-#         [p],
-#         defaults = Dict([p => 4.0]))
-
-#     analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-#     timepoints = collect(0.0:(1 / 100.0):2.0)
-#     u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-#     u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-#     dataset = [hcat(u1, timepoints)]
-
-#     # checking all training strategies
-#     discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
-#         dataset = [dataset, nothing])
-
-#     ahmc_bayesian_pinn_pde(pde_system,
-#         discretization;
-#         draw_samples = 1500,
-#         bcstd = [0.05],
-#         phystd = [0.01], l2std = [0.01],
-#         priorsNNw = (0.0, 1.0),
-#         saveats = [1 / 50.0],
-#         param = [LogNormal(6.0, 0.5)])
-
-#     discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
-#         dataset = [dataset, nothing])
-
-#     ahmc_bayesian_pinn_pde(pde_system,
-#         discretization;
-#         draw_samples = 1500,
-#         bcstd = [0.05],
-#         phystd = [0.01], l2std = [0.01],
-#         priorsNNw = (0.0, 1.0),
-#         saveats = [1 / 50.0],
-#         param = [LogNormal(6.0, 0.5)])
-
-#     discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
-#         dataset = [dataset, nothing])
-
-#     ahmc_bayesian_pinn_pde(pde_system,
-#         discretization;
-#         draw_samples = 1500,
-#         bcstd = [0.05],
-#         phystd = [0.01], l2std = [0.01],
-#         priorsNNw = (0.0, 1.0),
-#         saveats = [1 / 50.0],
-#         param = [LogNormal(6.0, 0.5)])
-
-#     discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-#         dataset = [dataset, nothing])
-
-#     sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#         discretization;
-#         draw_samples = 1500,
-#         bcstd = [0.05],
-#         phystd = [0.01], l2std = [0.01],
-#         priorsNNw = (0.0, 1.0),
-#         saveats = [1 / 50.0],
-#         param = [LogNormal(6.0, 0.5)])
-
-#     param = 2 * π
-#     ts = vec(sol1.timepoints[1])
-#     u_real = [analytic_sol_func1(0.0, t) for t in ts]
-#     u_predict = pmean(sol1.ensemblesol[1])
-
-#     @test u_predict≈u_real atol=1.5
-#     @test mean(u_predict .- u_real) < 0.1
-#     @test sol1.estimated_de_params[1]≈param atol=param * 0.3
-# end
-
-# @testset "Example 2: Lorenz System with parameter estimation" begin
-#     @parameters t, σ_
-#     @variables x(..), y(..), z(..)
-#     Dt = Differential(t)
-#     eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-#         Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-#         Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-#     bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-#     domains = [t ∈ Interval(0.0, 1.0)]
-
-#     input_ = length(domains)
-#     n = 7
-#     chain = [
-#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-#             Lux.Dense(n, 1)),
-#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-#             Lux.Dense(n, 1)),
-#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-#             Lux.Dense(n, 1)),
-#     ]
-
-#     #Generate Data
-#     function lorenz!(du, u, p, t)
-#         du[1] = 10.0 * (u[2] - u[1])
-#         du[2] = u[1] * (28.0 - u[3]) - u[2]
-#         du[3] = u[1] * u[2] - (8 / 3) * u[3]
-#     end
-
-#     u0 = [1.0; 0.0; 0.0]
-#     tspan = (0.0, 1.0)
-#     prob = ODEProblem(lorenz!, u0, tspan)
-#     sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-#     ts = sol.t
-#     us = hcat(sol.u...)
-#     us = us .+ ((0.05 .* randn(size(us))) .* us)
-#     ts_ = hcat(sol(ts).t...)[1, :]
-#     dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-#     discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-#         dataset = [dataset, nothing])
-
-#     @named pde_system = PDESystem(eqs, bcs, domains,
-#         [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-#     sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#         discretization;
-#         draw_samples = 50,
-#         bcstd = [0.3, 0.3, 0.3],
-#         phystd = [0.1, 0.1, 0.1],
-#         l2std = [1, 1, 1],
-#         priorsNNw = (0.0, 1.0),
-#         saveats = [0.01],
-#         param = [Normal(12.0, 2)])
-
-#     idealp = 10.0
-#     p_ = sol1.estimated_de_params[1]
-#     @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-#     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-# end
+@testset "Example 1: 2D Periodic System with parameter estimation" begin
+    # Cos(pi*t) periodic curve
+    @parameters t, p
+    @variables u(..)
+
+    Dt = Differential(t)
+    eqs = Dt(u(t)) - cos(p * t) ~ 0
+    bcs = [u(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 2.0)]
+
+    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+    initl, st = Lux.setup(Random.default_rng(), chainl)
+
+    @named pde_system = PDESystem(eqs,
+        bcs,
+        domains,
+        [t],
+        [u(t)],
+        [p],
+        defaults = Dict([p => 4.0]))
+
+    analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+    timepoints = collect(0.0:(1 / 100.0):2.0)
+    u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+    u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+    dataset = [hcat(u1, timepoints)]
+
+    # checking all training strategies
+    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
+        dataset = [dataset, nothing])
+
+    ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
+        dataset = [dataset, nothing])
+
+    ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
+        dataset = [dataset, nothing])
+
+    ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+        dataset = [dataset, nothing])
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    param = 2 * π
+    ts = vec(sol1.timepoints[1])
+    u_real = [analytic_sol_func1(0.0, t) for t in ts]
+    u_predict = pmean(sol1.ensemblesol[1])
+
+    @test u_predict≈u_real atol=1.5
+    @test mean(u_predict .- u_real) < 0.1
+    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
+end
+
+@testset "Example 2: Lorenz System with parameter estimation" begin
+    @parameters t, σ_
+    @variables x(..), y(..), z(..)
+    Dt = Differential(t)
+    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+        Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 1.0)]
+
+    input_ = length(domains)
+    n = 7
+    chain = [
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+    ]
+
+    #Generate Data
+    function lorenz!(du, u, p, t)
+        du[1] = 10.0 * (u[2] - u[1])
+        du[2] = u[1] * (28.0 - u[3]) - u[2]
+        du[3] = u[1] * u[2] - (8 / 3) * u[3]
+    end
+
+    u0 = [1.0; 0.0; 0.0]
+    tspan = (0.0, 1.0)
+    prob = ODEProblem(lorenz!, u0, tspan)
+    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+    ts = sol.t
+    us = hcat(sol.u...)
+    us = us .+ ((0.05 .* randn(size(us))) .* us)
+    ts_ = hcat(sol(ts).t...)[1, :]
+    dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+        dataset = [dataset, nothing])
+
+    @named pde_system = PDESystem(eqs, bcs, domains,
+        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 50,
+        bcstd = [0.3, 0.3, 0.3],
+        phystd = [0.1, 0.1, 0.1],
+        l2std = [1, 1, 1],
+        priorsNNw = (0.0, 1.0),
+        saveats = [0.01],
+        param = [Normal(12.0, 2)])
+
+    idealp = 10.0
+    p_ = sol1.estimated_de_params[1]
+    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+end
 
 function recur_expression(exp, Dict_differentials)
     for in_exp in exp.args

From b26a75bc4aacff6ec6a38de04d5eb04c3b902cb9 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 15 Feb 2024 12:44:09 +0530
Subject: [PATCH 022/107] update PDE_BPINN.jl

---
 src/PDE_BPINN.jl | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index a223c58e20..ec17257ef3 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -132,9 +132,12 @@ end
 function LogDensityProblems.logdensity(Tar::PDELogTargetDensity, θ)
     # for parameter estimation neccesarry to use multioutput case
     if Tar.L2_loss2 isa Nothing
-        return Tar.full_loglikelihood(setparameters(Tar, θ),Tar.allstd) + priorlogpdf(Tar, θ) + L2LossData(Tar, θ)
+        return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) +
+               priorlogpdf(Tar, θ) + L2LossData(Tar, θ)
     else
-        return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) + priorlogpdf(Tar, θ) + L2LossData(Tar, θ) + Tar.L2_loss2(setparameters(Tar, θ), Tar.allstd)
+        return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) +
+               priorlogpdf(Tar, θ) + L2LossData(Tar, θ) +
+               Tar.L2_loss2(setparameters(Tar, θ), Tar.allstd)
     end
 end
 
@@ -489,9 +492,11 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
             ℓπ.allstd))
     @info("Current Prior Log-likelihood : ", priorlogpdf(ℓπ, initial_θ))
     @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
-    @info("Current L2_LOSSY : ",
-        ℓπ.L2_loss2(setparameters(ℓπ, initial_θ),
-            ℓπ.allstd))
+    if !(newloss isa Nothing)
+        @info("Current L2_LOSSY : ",
+            ℓπ.L2_loss2(setparameters(ℓπ, initial_θ),
+                ℓπ.allstd))
+    end
 
     # parallel sampling option
     if nchains != 1
@@ -550,9 +555,11 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         @info("Current Prior Log-likelihood : ", priorlogpdf(ℓπ, samples[end]))
         @info("Current MSE against dataset Log-likelihood : ",
             L2LossData(ℓπ, samples[end]))
-        @info("Current L2_LOSSY : ",
-            ℓπ.L2_loss2(setparameters(ℓπ, samples[end]),
-                ℓπ.allstd))
+        if !(newloss isa Nothing)
+            @info("Current L2_LOSSY : ",
+                ℓπ.L2_loss2(setparameters(ℓπ, samples[end]),
+                    ℓπ.allstd))
+        end
 
         fullsolution = BPINNstats(mcmc_chain, samples, stats)
         ensemblecurves, estimnnparams, estimated_params, timepoints = inference(samples,

From f4b1bfb6c3f6b500e795461724a769695026175c Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 15 Feb 2024 15:46:01 +0530
Subject: [PATCH 023/107] removed length reweighing in BPINN ode, testset for
 recur..

---
 src/advancedHMC_MCMC.jl       |  18 ++-
 test/BPINN_PDEinvsol_tests.jl | 239 +++++++++++++++++-----------------
 2 files changed, 131 insertions(+), 126 deletions(-)

diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index e5797e7924..ea50eabcf6 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -86,10 +86,19 @@ end
 vector_to_parameters(ps_new::AbstractVector, ps::AbstractVector) = ps_new
 
 function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
+    # if Tar.estim_collocate
+    #     return physloglikelihood(Tar, θ) / length(Tar.dataset[1]) + priorweights(Tar, θ) +
+    #            L2LossData(Tar, θ) / length(Tar.dataset[1]) +
+    #            L2loss2(Tar, θ) / length(Tar.dataset[1])
+    # else
+    #     return physloglikelihood(Tar, θ) / length(Tar.dataset[1]) + priorweights(Tar, θ) +
+    #            L2LossData(Tar, θ) / length(Tar.dataset[1])
+    # end
     if Tar.estim_collocate
-        return physloglikelihood(Tar, θ)/length(Tar.dataset[1]) + priorweights(Tar, θ) + L2LossData(Tar, θ)/length(Tar.dataset[1]) + L2loss2(Tar, θ)/length(Tar.dataset[1])
+        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ) +
+               L2loss2(Tar, θ)
     else
-        return physloglikelihood(Tar, θ)/length(Tar.dataset[1]) + priorweights(Tar, θ) + L2LossData(Tar, θ)/length(Tar.dataset[1])
+        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
     end
 end
 
@@ -446,7 +455,6 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     MCMCkwargs = (n_leapfrog = 30,),
     progress = false, verbose = false,
     estim_collocate = false)
-
     !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)
@@ -570,9 +578,9 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         @info("Current Prior Log-likelihood : ", priorweights(ℓπ, samples[end]))
         @info("Current MSE against dataset Log-likelihood : ",
             L2LossData(ℓπ, samples[end]))
-            
+
         # return a chain(basic chain),samples and stats
-        matrix_samples = reshape(hcat(samples...), (length(samples[1]), length(samples), 1)) 
+        matrix_samples = reshape(hcat(samples...), (length(samples[1]), length(samples), 1))
         mcmc_chain = MCMCChains.Chains(matrix_samples)
         return mcmc_chain, samples, stats
     end
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index c659874afa..32076a0e9e 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -173,126 +173,123 @@ function recur_expression(exp, Dict_differentials)
     end
 end
 
-@testset "Example 3: 2D Periodic System with New parameter estimation" begin
-    # Cos(pi*t) periodic curve
-    @parameters t, p
-    @variables u(..)
-
-    Dt = Differential(t)
-    eqs = Dt(u(t)) - cos(p * t) ~ 0
-    bcs = [u(0) ~ 0.0]
-    domains = [t ∈ Interval(0.0, 2.0)]
-
-    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-    initl, st = Lux.setup(Random.default_rng(), chainl)
-
-    @named pde_system = PDESystem(eqs,
-        bcs,
-        domains,
-        [t],
-        [u(t)],
-        [p],
-        defaults = Dict([p => 4.0]))
-
-    analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-    timepoints = collect(0.0:(1 / 100.0):2.0)
-    u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-    u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-    dataset = [hcat(u1, timepoints)]
-
-    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-        dataset = [dataset, nothing])
-
-    # creating dictionary for masking equations
-    eqs = pde_system.eqs
-    Dict_differentials = Dict()
-    exps = toexpr.(eqs)
-    nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)],
-        Dict_differentials = Dict_differentials)
-
-    param = 2 * π
-    ts = vec(sol1.timepoints[1])
-    u_real = [analytic_sol_func1(0.0, t) for t in ts]
-    u_predict = pmean(sol1.ensemblesol[1])
-
-    @test u_predict≈u_real atol=1.5
-    @test mean(u_predict .- u_real) < 0.1
-    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
+println("Example 3: 2D Periodic System with New parameter estimation")
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+    dataset = [dataset, nothing])
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)],
+    Dict_differentials = Dict_differentials)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+println("Example 4: Lorenz System with New parameter estimation")
+@parameters t, σ_
+@variables x(..), y(..), z(..)
+Dt = Differential(t)
+eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 1.0)]
+
+input_ = length(domains)
+n = 7
+chain = [
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+]
+
+#Generate Data
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
 end
 
-@testset "Example 4: Lorenz System with New parameter estimation" begin
-    @parameters t, σ_
-    @variables x(..), y(..), z(..)
-    Dt = Differential(t)
-    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-        Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-    domains = [t ∈ Interval(0.0, 1.0)]
-
-    input_ = length(domains)
-    n = 7
-    chain = [
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-    ]
-
-    #Generate Data
-    function lorenz!(du, u, p, t)
-        du[1] = 10.0 * (u[2] - u[1])
-        du[2] = u[1] * (28.0 - u[3]) - u[2]
-        du[3] = u[1] * u[2] - (8 / 3) * u[3]
-    end
-
-    u0 = [1.0; 0.0; 0.0]
-    tspan = (0.0, 1.0)
-    prob = ODEProblem(lorenz!, u0, tspan)
-    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-    ts = sol.t
-    us = hcat(sol.u...)
-    us = us .+ ((0.05 .* randn(size(us))) .* us)
-    ts_ = hcat(sol(ts).t...)[1, :]
-    dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-        dataset = [dataset, nothing])
-
-    @named pde_system = PDESystem(eqs, bcs, domains,
-        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-    # creating dictionary for masking equations
-    eqs = pde_system.eqs
-    Dict_differentials = Dict()
-    exps = toexpr.(eqs)
-    nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 50,
-        bcstd = [0.3, 0.3, 0.3],
-        phystd = [0.1, 0.1, 0.1],
-        l2std = [1, 1, 1],
-        priorsNNw = (0.0, 1.0),
-        saveats = [0.01],
-        param = [Normal(12.0, 2)],
-        Dict_differentials = Dict_differentials)
-
-    idealp = 10.0
-    p_ = sol1.estimated_de_params[1]
-    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-end
\ No newline at end of file
+u0 = [1.0; 0.0; 0.0]
+tspan = (0.0, 1.0)
+prob = ODEProblem(lorenz!, u0, tspan)
+sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+ts = sol.t
+us = hcat(sol.u...)
+us = us .+ ((0.05 .* randn(size(us))) .* us)
+ts_ = hcat(sol(ts).t...)[1, :]
+dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+    dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs, bcs, domains,
+    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 50,
+    bcstd = [0.3, 0.3, 0.3],
+    phystd = [0.1, 0.1, 0.1],
+    l2std = [1, 1, 1],
+    priorsNNw = (0.0, 1.0),
+    saveats = [0.01],
+    param = [Normal(12.0, 2)],
+    Dict_differentials = Dict_differentials)
+
+idealp = 10.0
+p_ = sol1.estimated_de_params[1]
+@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]

From cd01ceeb67caa3aa832cd4d32c2134c2cb7c8242 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 17 Feb 2024 01:47:58 +0530
Subject: [PATCH 024/107] corrected tests, datasetnew format

---
 src/PDE_BPINN.jl    | 16 +++++++++++-----
 src/discretize.jl   |  2 +-
 test/BPINN_Tests.jl | 20 ++++++++++----------
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index ec17257ef3..69eaa2b733 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -360,7 +360,7 @@ end
 function ahmc_bayesian_pinn_pde(pde_system, discretization;
         draw_samples = 1000,
         bcstd = [0.01], l2std = [0.05],
-        phystd = [0.05], priorsNNw = (0.0, 2.0),
+        phystd = [0.05], phystdnew = [0.05], priorsNNw = (0.0, 2.0),
         param = [], nchains = 1, Kernel = HMC(0.1, 30),
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
@@ -377,18 +377,24 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         # eqs = pinnrep.bcs
         # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
 
+        # consider all dataset domain points and for each row new set of equation loss function
         # this is a vector of tuple{vector,nothing}
         pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
             GridTraining(0.1),
             yuh1[i],
-            nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset_pde],
+            nothing;
+            # pass transformation of each dataset row-corresponds to each point, for each depvar dataset point merged equation vector
+            train_sets_pde = get_dataset_train_points(pde_system.eqs,
+                [Array(data[i, :]') for data in dataset_pde],
+                pinnrep),
             train_sets_bc = nothing)
                               for i in eachindex(yuh1)]
 
         function L2_loss2(θ, allstd)
-            stdpdes, stdbcs, stdextra = allstd
+            stdpdesnew = allstd[4]
+
             # first vector of losses,from tuple -> pde losses, first[1] pde loss
-            pde_loglikelihoods = [[logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
+            pde_loglikelihoods = [[logpdf(Normal(0, stdpdesnew[j]), pde_loss_function(θ))
                                    for (j, pde_loss_function) in enumerate(pde_loss_functions[i][1])]
                                   for i in eachindex(pde_loss_functions)]
 
@@ -472,7 +478,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         strategy,
         dataset,
         priors,
-        [phystd, bcstd, l2std],
+        [phystd, bcstd, l2std, phystdnew],
         names,
         ninv,
         initial_nnθ,
diff --git a/src/discretize.jl b/src/discretize.jl
index a6c7c3bed1..0740544c09 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -626,7 +626,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
         end
 
         function full_loss_function(θ, allstd::Vector{Vector{Float64}})
-            stdpdes, stdbcs, stdextra = allstd
+            stdpdes, stdbcs, stdextra, stdpdesnew = allstd
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
             pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
                                 for (i, pde_loss_function) in enumerate(pde_loss_functions)]
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 6821a8d35e..2fe347b3b4 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -43,8 +43,8 @@ Random.seed!(100)
     # testing points
     t = time
     # Mean of last 500 sampled parameter's curves[Ensemble predictions]
-    θ = [vector_to_parameters(fhsamples[i], θinit) for i in 2000:2500]
-    luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
+    θ = [vector_to_parameters(fhsamples[i], θinit) for i in 2000:length(fhsamples)]
+    luxar = [chainlux(t', θ[i], st)[1] for i in eachindex(θ)]
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
@@ -110,8 +110,8 @@ end
     # testing points
     t = time
     # Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-    θ = [vector_to_parameters(fhsamples[i][1:(end - 1)], θinit) for i in 2000:2500]
-    luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+    θ = [vector_to_parameters(fhsamples[i][1:(end - 1)], θinit) for i in 2000:length(fhsamples)]
+    luxar = [chainlux1(t', θ[i], st)[1] for i in eachindex(θ)]
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
@@ -119,7 +119,7 @@ end
     @test mean(abs.(physsol1 .- meanscurve)) < 0.15
 
     # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-    @test abs(p - mean([fhsamples[i][23] for i in 2000:2500])) < abs(0.35 * p)
+    @test abs(p - mean([fhsamples[i][23] for i in 2000:length(fhsamples)])) < abs(0.35 * p)
 
     #-------------------------- solve() call  
     @test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
@@ -190,13 +190,13 @@ end
     t = sol.t
     #------------------------------ ahmc_bayesian_pinn_ode() call
     # Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
-    θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
-    luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+    θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:length(fhsampleslux12)]
+    luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
-    θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500]
-    luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+    θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:length(fhsampleslux22)]
+    luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
@@ -206,7 +206,7 @@ end
     @test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
 
     # estimated parameters(lux chain)
-    param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
+    param1 = mean(i[62] for i in fhsampleslux22[1000:length(fhsampleslux22)])
     @test abs(param1 - p) < abs(0.3 * p)
 
     #-------------------------- solve() call 

From 78cadf1ac9f5d91a0c4378ad500d43ccb089b5b8 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 22 Feb 2024 00:32:26 +0530
Subject: [PATCH 025/107] changes from reviews

---
 src/PDE_BPINN.jl              |  12 +-
 src/training_strategies.jl    |  18 +-
 test/BPINN_PDEinvsol_tests.jl | 303 ++++++++++++++++++++++++++++------
 3 files changed, 258 insertions(+), 75 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 69eaa2b733..500510d64c 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -81,20 +81,15 @@ function get_lossy(pinnrep, dataset, Dict_differentials)
                for i in 1:size(dataset[1][:, 1])[1]]
 
     # for each point(eq_sub dictionary) substiute in all equations(eqs_new - masked equations)
-    b = []
-    for eq_sub in eq_subs
-        push!(b, [substitute(eq, eq_sub) for eq in eqs_new])
-    end
+    b = [[substitute(eq, eq_sub) for eq in eqs_new] for eq_sub in eq_subs]
+
     # now we have vector of equation vectors
 
     # reverse dict for re-substituting values of Differential(t)(u(t)) etc
     rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
 
     # for each vector in vector of equation vectorbroadcast resubstituing OG mask values
-    c = []
-    for b_i in b
-        push!(c, substitute.(b_i, Ref(rev_Dict_differentials)))
-    end
+    c = [substitute.(b_i, Ref(rev_Dict_differentials)) for b_i in b]
 
     # get losses, zip each equation with args for each build_loss call per equation vector
     loss_functions = [[build_loss_function(pinnrep, eq, pde_indvar)
@@ -506,7 +501,6 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
 
     # parallel sampling option
     if nchains != 1
-
         # Cache to store the chains
         bpinnsols = Vector{Any}(undef, nchains)
 
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index ffec6fe2d2..1bf767beca 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -25,20 +25,10 @@ function get_dataset_train_points(eqs, train_sets, pinnrep)
     eq_args = NeuralPDE.get_argument(eqs, dict_indvars, dict_depvars)
     # [[:t]]
 
-    points = []
-    for eq_arg in eq_args
-        a = []
-        # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
-        for i in eachindex(symbols_input)
-            if symbols_input[i][2] == eq_arg
-                # include domain points of that depvar
-                # each loss equation take domain matrix [points..;points..]
-                push!(a, train_sets[i][:, 2:end]')
-            end
-        end
-        # vcat as new row for next equation
-        push!(points, vcat(a...))
-    end
+    points = [vcat([train_sets[i][:, 2:end]'
+                    for i in eachindex(symbols_input) if symbols_input[i][2] == eq_arg]...)
+              for eq_arg in eq_args]
+
     return points
 end
 
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 32076a0e9e..1eb6784f6a 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -216,7 +216,7 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0],
     param = [LogNormal(6.0, 0.5)],
-    Dict_differentials = Dict_differentials)
+    Dict_differentials = Dict_differentials, progress = true)
 
 param = 2 * π
 ts = vec(sol1.timepoints[1])
@@ -227,69 +227,268 @@ u_predict = pmean(sol1.ensemblesol[1])
 @test mean(u_predict .- u_real) < 0.1
 @test sol1.estimated_de_params[1]≈param atol=param * 0.3
 
-println("Example 4: Lorenz System with New parameter estimation")
-@parameters t, σ_
-@variables x(..), y(..), z(..)
+println("Example 3: Lotka Volterra with New parameter estimation")
+@parameters t α β γ δ
+@variables x(..) y(..)
+
 Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-
-input_ = length(domains)
-n = 7
-chain = [
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
+eqs = [Dt(x(t)) ~ α * x(t) - β * x(t) * y(t), Dt(y(t)) ~ -γ * y(t) + δ * x(t) * y(t)]
+bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
+domains = [t ∈ Interval(0.0, 4.0)]
+
+# Define the parameters' values
+# params = [α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]
+# p = [1.5, 1.0, 3.0, 1.0]
+
+chainl = [
+    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+        Lux.Dense(6, 1)),
+    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+        Lux.Dense(6, 1)),
 ]
 
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
+initl, st = Lux.setup(Random.default_rng(), chainl[1])
+initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [x(t), y(t)],
+    [α, β, γ, δ],
+    defaults = Dict([α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]))
+
+using NeuralPDE, Lux, Plots, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    α, β, γ, δ = p
+    x, y = u
+    dx = (α - β * y) * x
+    dy = (δ * x - γ) * y
+    return [dx, dy]
 end
 
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-ts = sol.t
-us = hcat(sol.u...)
-us = us .+ ((0.05 .* randn(size(us))) .* us)
-ts_ = hcat(sol(ts).t...)[1, :]
-dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 4.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+# Solve using OrdinaryDiffEq.jl solver
+dt = 0.05
+solution = solve(prob, Tsit5(); saveat = dt)
+
+# Extract solution
+time = solution.t
+u = hcat(solution.u...)
+# plot(time, u[1, :])
+# plot!(time, u[2, :])
+# Construct dataset
+dataset = [hcat(u[i, :], time) for i in 1:2]
+
+discretization = BayesianPINN(chainl, GridTraining(0.01), param_estim = true,
     dataset = [dataset, nothing])
 
-@named pde_system = PDESystem(eqs, bcs, domains,
-    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
 # creating dictionary for masking equations
 eqs = pde_system.eqs
 Dict_differentials = Dict()
 exps = toexpr.(eqs)
 nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
 
+sol = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+    ], progress = true)
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
 sol1 = ahmc_bayesian_pinn_pde(pde_system,
     discretization;
-    draw_samples = 50,
-    bcstd = [0.3, 0.3, 0.3],
-    phystd = [0.1, 0.1, 0.1],
-    l2std = [1, 1, 1],
-    priorsNNw = (0.0, 1.0),
-    saveats = [0.01],
-    param = [Normal(12.0, 2)],
-    Dict_differentials = Dict_differentials)
-
-idealp = 10.0
-p_ = sol1.estimated_de_params[1]
-@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    phystdnew = [0.1, 0.1],
+    #  Kernel = AdvancedHMC.NUTS(0.8),
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+    ],
+    Dict_differentials = Dict_differentials, progress = true)
+
+# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
+# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+# points1 = []
+# for eq_arg in eq_args
+#     a = []
+#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
+#     for i in eachindex(symbols_input)
+#         if symbols_input[i][2] == eq_arg
+#             # include domain points of that depvar
+#             # each loss equation take domain matrix [points..;points..]
+#             push!(a, train_sets[i][:, 2:end]')
+#         end
+#     end
+#     # vcat as new row for next equation
+#     push!(points1, vcat(a...))
+# end
+# println(points1 == points)
+
+using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+import ModelingToolkit: Interval, infimum, supremum, Distributions
+using Plots, MonteCarloMeasurements
+
+@parameters x, t, α
+@variables u(..)
+Dt = Differential(t)
+Dx = Differential(x)
+Dx2 = Differential(x)^2
+Dx3 = Differential(x)^3
+Dx4 = Differential(x)^4
+
+# α = 1
+β = 4
+γ = 1
+eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+bcs = [u(x, 0) ~ u_analytic(x, 0),
+    u(-10, t) ~ u_analytic(-10, t),
+    u(10, t) ~ u_analytic(10, t),
+    Dx(u(-10, t)) ~ du(-10, t),
+    Dx(u(10, t)) ~ du(10, t)]
+
+# Space and time domains
+domains = [x ∈ Interval(-10.0, 10.0),
+    t ∈ Interval(0.0, 1.0)]
+
+# Discretization
+dx = 0.4;
+dt = 0.2;
+
+# Function to compute analytical solution at a specific point (x, t)
+function u_analytic_point(x, t)
+    z = -x / 2 + t
+    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+end
+
+# Function to generate the dataset matrix
+function generate_dataset_matrix(domains, dx, dt)
+    x_values = -10:dx:10
+    t_values = 0.0:dt:1.0
+
+    dataset = []
+
+    for t in t_values
+        for x in x_values
+            u_value = u_analytic_point(x, t)
+            push!(dataset, [u_value, x, t])
+        end
+    end
+
+    return vcat([data' for data in dataset]...)
+end
+
+datasetpde = [generate_dataset_matrix(domains, dx, dt)]
+
+# noise to dataset
+noisydataset = deepcopy(datasetpde)
+noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
+                        randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
+                        noisydataset[1][:, 1]
+
+# plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+# plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+# Neural network
+chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+    Lux.Dense(8, 8, Lux.tanh),
+    Lux.Dense(8, 1))
+
+discretization = NeuralPDE.BayesianPINN([chain],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+@named pde_system = PDESystem(eq,
+    bcs,
+    domains,
+    [x, t],
+    [u(x, t)],
+    [α],
+    defaults = Dict([α => 0.5]))
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100,
+    bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+    phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 100.0, 1 / 100.0], progress = true)
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100,
+    bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+    phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
+    param = [Distributions.LogNormal(0.5, 2)],
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
+    progress = true)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, [dx / 10, dt])]
+u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+# p1 = plot(xs, u_predict, title = "predict")
+# p2 = plot(xs, u_real, title = "analytic")
+# p3 = plot(xs, diff_u, title = "error")
+# plot(p1, p2, p3)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, [dx / 10, dt])]
+u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+# p1 = plot(xs, u_predict, title = "predict")
+# p2 = plot(xs, u_real, title = "analytic")
+# p3 = plot(xs, diff_u, title = "error")
+# plot(p1, p2, p3)

From 49dd7cbf4952d110e8d2ada4aa53b9f86f8ca2bb Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 27 Feb 2024 02:44:21 +0530
Subject: [PATCH 026/107] refactor code, Corrected PDE_BPINN Logphys calc.

---
 src/PDE_BPINN.jl              |  26 ++-
 src/advancedHMC_MCMC.jl       |   2 +-
 src/discretize.jl             | 108 ++++++----
 src/training_strategies.jl    |  52 ++++-
 test/BPINN_PDEinvsol_tests.jl | 386 +++++++++++++++++++++-------------
 5 files changed, 367 insertions(+), 207 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 500510d64c..4375621782 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -3,8 +3,9 @@ mutable struct PDELogTargetDensity{
     D <: Union{Nothing, Vector{<:Matrix{<:Real}}},
     P <: Vector{<:Distribution},
     I,
-    F, FF,
-    PH,
+    F,
+    FF,
+    PH
 }
     dim::Int64
     strategy::ST
@@ -26,8 +27,9 @@ mutable struct PDELogTargetDensity{
             typeof(dataset),
             typeof(priors),
             typeof(init_params),
-            typeof(full_loglikelihood), typeof(L2_loss2),
-            typeof(Φ),
+            typeof(full_loglikelihood),
+            typeof(L2_loss2),
+            typeof(Φ)
         }(dim,
             strategy,
             dataset,
@@ -36,7 +38,8 @@ mutable struct PDELogTargetDensity{
             names,
             extraparams,
             init_params,
-            full_loglikelihood, L2_loss2,
+            full_loglikelihood,
+            L2_loss2,
             Φ)
     end
     function PDELogTargetDensity(dim, strategy, dataset,
@@ -48,8 +51,9 @@ mutable struct PDELogTargetDensity{
             typeof(dataset),
             typeof(priors),
             typeof(init_params),
-            typeof(full_loglikelihood), typeof(L2_loss2),
-            typeof(Φ),
+            typeof(full_loglikelihood),
+            typeof(L2_loss2),
+            typeof(Φ)
         }(dim,
             strategy,
             dataset,
@@ -58,7 +62,8 @@ mutable struct PDELogTargetDensity{
             names,
             extraparams,
             init_params,
-            full_loglikelihood, L2_loss2,
+            full_loglikelihood,
+            L2_loss2,
             Φ)
     end
 end
@@ -374,7 +379,8 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
 
         # consider all dataset domain points and for each row new set of equation loss function
         # this is a vector of tuple{vector,nothing}
-        pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
+        pde_loss_functions = [merge_strategy_with_loglikelihood_function(
+            pinnrep::PINNRepresentation,
             GridTraining(0.1),
             yuh1[i],
             nothing;
@@ -453,7 +459,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # add init_params for NN params
     priors = [
         MvNormal(priorsNNw[1] * ones(nparameters),
-            LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters)))),
+        LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
     ]
 
     # append Ode params to all paramvector - initial_θ
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index ea50eabcf6..9064ddd9fa 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -261,7 +261,7 @@ function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector,
 
     vals = nnsol .- physsol
 
-    # N dimensional vector if N outputs for NN(each row has logpdf of i[i] where u is vector of dependant variables)
+    # N dimensional vector if N outputs for NN(each row has logpdf of u[i] where u is vector of dependant variables)
     return [logpdf(MvNormal(vals[i, :],
             LinearAlgebra.Diagonal(abs2.(Tar.phystd[i] .*
                                          ones(length(vals[i, :]))))),
diff --git a/src/discretize.jl b/src/discretize.jl
index 0740544c09..dfb9b5174c 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -504,29 +504,30 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
                                                                                  bc_indvars,
                                                                                  bc_integration_vars)]
 
-    pde_loss_functions, bc_loss_functions = merge_strategy_with_loss_function(pinnrep,
-                                                                              strategy,
-                                                                              datafree_pde_loss_functions,
-                                                                              datafree_bc_loss_functions)
-    # setup for all adaptive losses
-    num_pde_losses = length(pde_loss_functions)
-    num_bc_losses = length(bc_loss_functions)
-    # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
-    num_additional_loss = additional_loss isa Nothing ? 0 : 1
-
-    adaloss_T = eltype(adaloss.pde_loss_weights)
-
-    # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
-    adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .* adaloss.pde_loss_weights
-    adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
-    adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
-                                      adaloss.additional_loss_weights
-
-    reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
-                                                           pde_loss_functions,
-                                                           bc_loss_functions)
-
     function get_likelihood_estimate_function(discretization::PhysicsInformedNN)
+        pde_loss_functions, bc_loss_functions = merge_strategy_with_loss_function(pinnrep,
+            strategy,
+            datafree_pde_loss_functions,
+            datafree_bc_loss_functions)
+        # setup for all adaptive losses
+        num_pde_losses = length(pde_loss_functions)
+        num_bc_losses = length(bc_loss_functions)
+        # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
+        num_additional_loss = additional_loss isa Nothing ? 0 : 1
+
+        adaloss_T = eltype(adaloss.pde_loss_weights)
+
+        # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
+        adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .*
+                                   adaloss.pde_loss_weights
+        adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
+        adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
+                                          adaloss.additional_loss_weights
+
+        reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
+            pde_loss_functions,
+            bc_loss_functions)
+
         function full_loss_function(θ, p)
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
             pde_losses = [pde_loss_function(θ) for pde_loss_function in pde_loss_functions]
@@ -603,46 +604,66 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             return full_weighted_loss
         end
 
-        return full_loss_function
+        return bc_loss_functions, pde_loss_functions, full_loss_function
     end
 
     function get_likelihood_estimate_function(discretization::BayesianPINN)
+        # Because seperate reweighting code section needed and loglikelihood is pointwise independant
+        pde_loss_functions, bc_loss_functions = merge_strategy_with_loglikelihood_function(
+            pinnrep,
+            strategy,
+            datafree_pde_loss_functions,
+            datafree_bc_loss_functions)
+
+        # setup for all adaptive losses
+        num_pde_losses = length(pde_loss_functions)
+        num_bc_losses = length(bc_loss_functions)
+        # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
+        num_additional_loss = additional_loss isa Nothing ? 0 : 1
+
+        adaloss_T = eltype(adaloss.pde_loss_weights)
+
+        # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
+        adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .*
+                                   adaloss.pde_loss_weights
+        adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
+        adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
+                                          adaloss.additional_loss_weights
+
+        reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
+            pde_loss_functions,
+            bc_loss_functions)
+
         dataset_pde, dataset_bc = discretization.dataset
         dataset_pde = dataset_pde isa Nothing ? dataset_pde : get_dataset_train_points(eqs, dataset_pde, pinnrep)
         dataset_bc = dataset_bc isa Nothing ? dataset_bc : get_dataset_train_points(eqs, dataset_bc, pinnrep)
 
         # required as Physics loss also needed on the discrete dataset domain points
         # data points are discrete and so by default GridTraining loss applies
-        # passing placeholder dx with GridTraining, it uses data points irl
-        datapde_loss_functions, databc_loss_functions = if (!(dataset_bc isa Nothing)||!(dataset_pde isa Nothing))
-            merge_strategy_with_loglikelihood_function(pinnrep,
+        # passing placeholder dx with GridTraining, it uses dataset points irl
+        datapde_loss_functions, databc_loss_functions = merge_strategy_with_loglikelihood_function(
+                pinnrep,
                 GridTraining(0.1),
                 datafree_pde_loss_functions,
                 datafree_bc_loss_functions,
                 train_sets_pde = dataset_pde,
                 train_sets_bc = dataset_bc)
-        else
-            (nothing, nothing)
-        end
 
         function full_loss_function(θ, allstd::Vector{Vector{Float64}})
             stdpdes, stdbcs, stdextra, stdpdesnew = allstd
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
-            pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
-                                for (i, pde_loss_function) in enumerate(pde_loss_functions)]
-
-            bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
-                                for (j, bc_loss_function) in enumerate(bc_loss_functions)]
-
+            # pde_loglikelihoods = sum([logpdf(MvNormal(pde_loss_function(θ)[1, :], LinearAlgebra.Diagonal(abs2.(stdpdes[i] .* ones(pde_loss_length[i])))), zeros(pde_loss_length[i])) for (i, pde_loss_function) in enumerate(pde_loss_functions)])
+            pde_loglikelihoods = sum([pde_loss_function(θ, stdpdes[i]) for (i, pde_loss_function) in enumerate(pde_loss_functions)])
+            # bc_loglikelihoods = sum([logpdf(MvNormal(bc_loss_function(θ)[1, :], LinearAlgebra.Diagonal(abs2.(stdbcs[j] .* ones(bc_loss_length[j])))), zeros(bc_loss_length[j])) for (j, bc_loss_function) in enumerate(bc_loss_functions)])
+            bc_loglikelihoods = sum([bc_loss_function(θ, stdbcs[j]) for (j, bc_loss_function) in enumerate(bc_loss_functions)])
             if !(datapde_loss_functions isa Nothing)
-                pde_loglikelihoods += [logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
-                                    for (j, pde_loss_function) in enumerate(datapde_loss_functions)]
-
+                pde_loglikelihoods += sum([datapde_loss_function(θ, stdpdes[i]) for (i, datapde_loss_function) in enumerate(datapde_loss_functions)])
+                # sum([logpdf(MvNormal(datapde_loss_function(θ)[1, :], LinearAlgebra.Diagonal(abs2.(stdpdes[i] .* ones(datapde_length[i])))), zeros(datapde_length[i])) for (i, datapde_loss_function) in enumerate(datapde_loss_functions)])
             end
 
             if !(databc_loss_functions isa Nothing)
-                bc_loglikelihoods += [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ)) 
-                                    for (j, bc_loss_function) in enumerate(databc_loss_functions)]
+                bc_loglikelihoods += sum([databc_loss_function(θ, stdbcs[j]) for (j, databc_loss_function) in enumerate(databc_loss_functions)])
+                # sum([logpdf(MvNormal(databc_loss_function(θ)[1, :], LinearAlgebra.Diagonal(abs2.(stdbcs[j] .* ones(databc_length[j])))), zeros(databc_length[j])) for (j, databc_loss_function) in enumerate(databc_loss_functions)])
             end
 
             # this is kind of a hack, and means that whenever the outer function is evaluated the increment goes up, even if it's not being optimized
@@ -688,12 +709,13 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             return full_weighted_loglikelihood
         end
 
-        return full_loss_function
+        return bc_loss_functions, pde_loss_functions, full_loss_function
     end
 
-    full_loss_function = get_likelihood_estimate_function(discretization)
+    bc_loss_functions, pde_loss_functions, full_loss_function = get_likelihood_estimate_function(discretization)
+
     pinnrep.loss_functions = PINNLossFunctions(bc_loss_functions, pde_loss_functions,
-                                                full_loss_function, additional_loss, 
+                                                full_loss_function, additional_loss,
                                                 datafree_pde_loss_functions,
                                                 datafree_bc_loss_functions)
 
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index 1bf767beca..9fdff04f06 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -14,6 +14,7 @@ struct GridTraining{T} <: AbstractTrainingStrategy
     dx::T
 end
 
+# dataset must have depvar values for same values of indvars
 function get_dataset_train_points(eqs, train_sets, pinnrep)
     dict_depvar_input = pinnrep.dict_depvar_input
     depvars = pinnrep.depvars
@@ -23,11 +24,28 @@ function get_dataset_train_points(eqs, train_sets, pinnrep)
     symbols_input = [(i, dict_depvar_input[i]) for i in depvars]
     # [(:u, [:t])]
     eq_args = NeuralPDE.get_argument(eqs, dict_indvars, dict_depvars)
-    # [[:t]]
-
-    points = [vcat([train_sets[i][:, 2:end]'
-                    for i in eachindex(symbols_input) if symbols_input[i][2] == eq_arg]...)
-              for eq_arg in eq_args]
+    # equation wise indvar presence ~ [[:t]]
+    # in each equation atleast one depvars must be a function of all indvars(to cover heterogenous/not case)
+
+    # train_sets follows order of depvars
+    # take dataset indvar values if for equations depvar's indvar matches input symbol indvar
+    # points =  [vcat([train_sets[i][:, 2:end]'
+    #                 for i in eachindex(symbols_input) if symbols_input[i][2] == eq_arg]...)
+    #           for eq_arg in eq_args]
+
+    points = []
+    for eq_arg in eq_args
+        eq_points = []
+        for i in eachindex(symbols_input)
+            if symbols_input[i][2] == eq_arg
+                push!(eq_points, train_sets[i][:, 2:end]')
+                # Terminate to avoid repetitive ind var points inclusion
+                break
+            end
+        end
+        # Concatenate points for this equation argument
+        push!(points, vcat(eq_points...))
+    end
 
     return points
 end
@@ -36,17 +54,25 @@ end
 function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
         strategy::GridTraining,
         datafree_pde_loss_function,
-        datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc=nothing)
+        datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc = nothing)
     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
-
+    dx = strategy.dx
     eltypeθ = eltype(pinnrep.flat_init_params)
 
+    # physics loss merge_strategy_with_loglikelihood_function call case
+    if ((train_sets_bc isa Nothing)&&(train_sets_pde isa Nothing))
+        train_sets_pde, train_sets_bc = generate_training_sets(
+            domains, dx, eqs, bcs, eltypeθ,
+            dict_indvars, dict_depvars)
+    end
+
     # is vec as later each _set in pde_train_sets are columns as points transformed to vector of points (pde_train_sets must be rowwise)
     pde_loss_functions = if !(train_sets_pde isa Nothing)
+        # dataset and domain pde losses case
         pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
             train_sets_pde)
 
-        [get_loss_function(_loss, _set, eltypeθ, strategy)
+        [get_points_loss_functions(_loss, _set, eltypeθ, strategy)
                               for (_loss, _set) in zip(datafree_pde_loss_function,
             pde_train_sets)]
     else
@@ -54,10 +80,11 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
     end
     
     bc_loss_functions = if !(train_sets_bc isa Nothing)
+        # dataset and domain bc losses case
         bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
             train_sets_bc)
 
-        [get_loss_function(_loss, _set, eltypeθ, strategy)
+        [get_points_loss_functions(_loss, _set, eltypeθ, strategy)
                          for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
     else
         nothing
@@ -66,6 +93,13 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
     pde_loss_functions, bc_loss_functions
 end
 
+function get_points_loss_functions(loss_function, train_set, eltypeθ, strategy::GridTraining;
+        τ = nothing)
+    function loss(θ, std)
+        logpdf(MvNormal(loss_function(train_set, θ)[1, :], LinearAlgebra.Diagonal(abs2.(std .* ones(length(train_set))))), zeros(length(train_set)))
+    end
+end
+
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
                                            strategy::GridTraining,
                                            datafree_pde_loss_function,
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 1eb6784f6a..250f98810b 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -112,7 +112,7 @@ end
         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
             Lux.Dense(n, 1)),
         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
+            Lux.Dense(n, 1))
     ]
 
     #Generate Data
@@ -216,7 +216,7 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0],
     param = [LogNormal(6.0, 0.5)],
-    Dict_differentials = Dict_differentials, progress = true)
+    Dict_differentials = Dict_differentials)
 
 param = 2 * π
 ts = vec(sol1.timepoints[1])
@@ -234,7 +234,7 @@ println("Example 3: Lotka Volterra with New parameter estimation")
 Dt = Differential(t)
 eqs = [Dt(x(t)) ~ α * x(t) - β * x(t) * y(t), Dt(y(t)) ~ -γ * y(t) + δ * x(t) * y(t)]
 bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
-domains = [t ∈ Interval(0.0, 4.0)]
+domains = [t ∈ Interval(0.0, 6.0)]
 
 # Define the parameters' values
 # params = [α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]
@@ -244,7 +244,7 @@ chainl = [
     Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
         Lux.Dense(6, 1)),
     Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-        Lux.Dense(6, 1)),
+        Lux.Dense(6, 1))
 ]
 
 initl, st = Lux.setup(Random.default_rng(), chainl[1])
@@ -256,9 +256,9 @@ initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
     [t],
     [x(t), y(t)],
     [α, β, γ, δ],
-    defaults = Dict([α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]))
+    defaults = Dict([α => 5, β => 0, γ => 0.5, δ => 2]))
 
-using NeuralPDE, Lux, Plots, OrdinaryDiffEq, Distributions, Random
+using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
 
 function lotka_volterra(u, p, t)
     α, β, γ, δ = p
@@ -281,10 +281,14 @@ solution = solve(prob, Tsit5(); saveat = dt)
 # Extract solution
 time = solution.t
 u = hcat(solution.u...)
+u1 = u .+ ((0.3 .* randn(size(u))) .* u)
+
+# using Plots, StatsPlots
+# plotly()
 # plot(time, u[1, :])
 # plot!(time, u[2, :])
 # Construct dataset
-dataset = [hcat(u[i, :], time) for i in 1:2]
+dataset = [hcat(u1[i, :], time) for i in 1:2]
 
 discretization = BayesianPINN(chainl, GridTraining(0.01), param_estim = true,
     dataset = [dataset, nothing])
@@ -295,6 +299,101 @@ Dict_differentials = Dict()
 exps = toexpr.(eqs)
 nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
 
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(0, 2),
+        Normal(0, 2),
+        Normal(0, 2),
+        Normal(0, 2)
+    ])
+
+# plot(sol2.timepoints[1]', sol2.ensemblesol[1])
+# plot!(sol2.timepoints[2]', sol2.ensemblesol[2])
+
+# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
+# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.1, 0.1],
+#     phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+#     phystdnew = [1, 1],
+#     priorsNNw = (0.0, 3.0),
+#     saveats = [1 / 50.0],
+#     # Kernel = AdvancedHMC.NUTS(0.8),
+#     param = [
+#         Normal(1, 2),
+#         Normal(2, 2),
+#         Normal(2, 2),
+#         Normal(0, 2)
+#     ], Dict_differentials = Dict_differentials, progress = true)
+
+# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
+# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
+
+# sol3 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# sol = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     # Kernel = AdvancedHMC.NUTS(0.8),
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ], progress = true)
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
+# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
+
 sol = ahmc_bayesian_pinn_pde(pde_system,
     discretization;
     draw_samples = 500,
@@ -307,8 +406,8 @@ sol = ahmc_bayesian_pinn_pde(pde_system,
         Normal(1.0, 2),
         Normal(1.0, 2),
         Normal(1.0, 2),
-        Normal(1.0, 2),
-    ], progress = true)
+        Normal(1.0, 2)
+    ])
 
 # plot!(sol.timepoints[1]', sol.ensemblesol[1])
 # plot!(sol.timepoints[2]', sol.ensemblesol[2])
@@ -318,7 +417,7 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     draw_samples = 500,
     bcstd = [0.05, 0.05],
     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    phystdnew = [0.1, 0.1],
+    phystdnew = [0.5, 0.5],
     #  Kernel = AdvancedHMC.NUTS(0.8),
     priorsNNw = (0.0, 10.0),
     saveats = [1 / 50.0],
@@ -326,12 +425,9 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
         Normal(1.0, 2),
         Normal(1.0, 2),
         Normal(1.0, 2),
-        Normal(1.0, 2),
+        Normal(1.0, 2)
     ],
-    Dict_differentials = Dict_differentials, progress = true)
-
-# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
-# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
+    Dict_differentials = Dict_differentials)
 
 param = 2 * π
 ts = vec(sol1.timepoints[1])
@@ -358,137 +454,139 @@ u_predict = pmean(sol1.ensemblesol[1])
 # end
 # println(points1 == points)
 
-using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-import ModelingToolkit: Interval, infimum, supremum, Distributions
-using Plots, MonteCarloMeasurements
-
-@parameters x, t, α
-@variables u(..)
-Dt = Differential(t)
-Dx = Differential(x)
-Dx2 = Differential(x)^2
-Dx3 = Differential(x)^3
-Dx4 = Differential(x)^4
-
-# α = 1
-β = 4
-γ = 1
-eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-bcs = [u(x, 0) ~ u_analytic(x, 0),
-    u(-10, t) ~ u_analytic(-10, t),
-    u(10, t) ~ u_analytic(10, t),
-    Dx(u(-10, t)) ~ du(-10, t),
-    Dx(u(10, t)) ~ du(10, t)]
-
-# Space and time domains
-domains = [x ∈ Interval(-10.0, 10.0),
-    t ∈ Interval(0.0, 1.0)]
-
-# Discretization
-dx = 0.4;
-dt = 0.2;
-
-# Function to compute analytical solution at a specific point (x, t)
-function u_analytic_point(x, t)
-    z = -x / 2 + t
-    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-end
-
-# Function to generate the dataset matrix
-function generate_dataset_matrix(domains, dx, dt)
-    x_values = -10:dx:10
-    t_values = 0.0:dt:1.0
-
-    dataset = []
-
-    for t in t_values
-        for x in x_values
-            u_value = u_analytic_point(x, t)
-            push!(dataset, [u_value, x, t])
-        end
-    end
-
-    return vcat([data' for data in dataset]...)
-end
-
-datasetpde = [generate_dataset_matrix(domains, dx, dt)]
-
-# noise to dataset
-noisydataset = deepcopy(datasetpde)
-noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
-                        randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
-                        noisydataset[1][:, 1]
-
-# plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-# plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-# Neural network
-chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-    Lux.Dense(8, 8, Lux.tanh),
-    Lux.Dense(8, 1))
+# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+# import ModelingToolkit: Interval, infimum, supremum, Distributions
+# using Plots, MonteCarloMeasurements
+
+# @parameters x, t, α
+# @variables u(..)
+# Dt = Differential(t)
+# Dx = Differential(x)
+# Dx2 = Differential(x)^2
+# Dx3 = Differential(x)^3
+# Dx4 = Differential(x)^4
+
+# # α = 1
+# β = 4
+# γ = 1
+# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+# bcs = [u(x, 0) ~ u_analytic(x, 0),
+#     u(-10, t) ~ u_analytic(-10, t),
+#     u(10, t) ~ u_analytic(10, t),
+#     Dx(u(-10, t)) ~ du(-10, t),
+#     Dx(u(10, t)) ~ du(10, t)]
+
+# # Space and time domains
+# domains = [x ∈ Interval(-10.0, 10.0),
+#     t ∈ Interval(0.0, 1.0)]
+
+# # Discretization
+# dx = 0.4;
+# dt = 0.2;
+
+# # Function to compute analytical solution at a specific point (x, t)
+# function u_analytic_point(x, t)
+#     z = -x / 2 + t
+#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# end
 
-discretization = NeuralPDE.BayesianPINN([chain],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+# # Function to generate the dataset matrix
+# function generate_dataset_matrix(domains, dx, dt)
+#     x_values = -10:dx:10
+#     t_values = 0.0:dt:1.0
 
-@named pde_system = PDESystem(eq,
-    bcs,
-    domains,
-    [x, t],
-    [u(x, t)],
-    [α],
-    defaults = Dict([α => 0.5]))
+#     dataset = []
 
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100,
-    bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-    phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 100.0, 1 / 100.0], progress = true)
+#     for t in t_values
+#         for x in x_values
+#             u_value = u_analytic_point(x, t)
+#             push!(dataset, [u_value, x, t])
+#         end
+#     end
 
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+#     return vcat([data' for data in dataset]...)
+# end
 
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100,
-    bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-    phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
-    param = [Distributions.LogNormal(0.5, 2)],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
-    progress = true)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, [dx / 10, dt])]
-u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-# p1 = plot(xs, u_predict, title = "predict")
-# p2 = plot(xs, u_real, title = "analytic")
-# p3 = plot(xs, diff_u, title = "error")
-# plot(p1, p2, p3)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, [dx / 10, dt])]
-u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-# p1 = plot(xs, u_predict, title = "predict")
-# p2 = plot(xs, u_real, title = "analytic")
-# p3 = plot(xs, diff_u, title = "error")
-# plot(p1, p2, p3)
+# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
+
+# # noise to dataset
+# noisydataset = deepcopy(datasetpde)
+# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
+#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
+#                         noisydataset[1][:, 1]
+
+# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+# # Neural network
+# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+#     Lux.Dense(8, 8, Lux.tanh),
+#     Lux.Dense(8, 1))
+
+# discretization = NeuralPDE.BayesianPINN([chain],
+#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+# @named pde_system = PDESystem(eq,
+#     bcs,
+#     domains,
+#     [x, t],
+#     [u(x, t)],
+#     [α],
+#     defaults = Dict([α => 0.5]))
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
+
+# eqs = pde_system.eqs
+# Dict_differentials = Dict()
+# exps = toexpr.(eqs)
+# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+# sol2 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
+#     param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
+#     progress = true)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)

From 014a11d6cb2a7fdcd688847cb3dc9ad03f975118 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 29 Feb 2024 00:12:00 +0530
Subject: [PATCH 027/107] corrected original and new implementation, comments

---
 src/PDE_BPINN.jl              |  96 ++++----
 src/discretize.jl             |  21 +-
 src/training_strategies.jl    |  11 +-
 test/BPINN_PDE_tests.jl       |  21 +-
 test/BPINN_PDEinvsol_tests.jl | 447 +---------------------------------
 5 files changed, 82 insertions(+), 514 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 4375621782..b18c35aa99 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -68,63 +68,66 @@ mutable struct PDELogTargetDensity{
     end
 end
 
-# for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
-# and final loss for bc must be together in a vector(bcs has separate type of dataset_bc)
-# eqs is vector of pde eqs and dataset here is dataset_pde
-# normally you get vector of losses
+# you get a vector of losses
 function get_lossy(pinnrep, dataset, Dict_differentials)
     eqs = pinnrep.eqs
-    depvars = pinnrep.depvars # order is same as dataset and interps
+    depvars = pinnrep.depvars #depvar order is same as dataset
 
     # Dict_differentials is filled with Differential operator => diff_i key-value pairs
     # masking operation
     eqs_new = substitute.(eqs, Ref(Dict_differentials))
 
     to_subs, tobe_subs = get_symbols(dataset, depvars, eqs)
-    # for each row in dataset create u values for substituting in equation, n_equations=n_rows
+
+    # for values of all depvars at corresponding indvar values in dataset, create dictionaries {Dict(x(t) => 1.0496435863173237, y(t) => 1.9227770685615337)}
+    # In each Dict, num form of depvar is key to its value at certain coords of indvars, n_dicts = n_rows_dataset(or n_indvar_coords_dataset)
     eq_subs = [Dict(tobe_subs[depvar] => to_subs[depvar][i] for depvar in depvars)
                for i in 1:size(dataset[1][:, 1])[1]]
 
-    # for each point(eq_sub dictionary) substiute in all equations(eqs_new - masked equations)
-    b = [[substitute(eq, eq_sub) for eq in eqs_new] for eq_sub in eq_subs]
-
-    # now we have vector of equation vectors
+    # for each dataset point(eq_sub dictionary), substitute in masked equations
+    # n_collocated_equations = n_rows_dataset(or n_indvar_coords_dataset)
+    masked_colloc_equations = [[substitute(eq, eq_sub) for eq in eqs_new]
+                            for eq_sub in eq_subs]
+    # now we have vector of dataset depvar's collocated equations
 
     # reverse dict for re-substituting values of Differential(t)(u(t)) etc
     rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
 
-    # for each vector in vector of equation vectorbroadcast resubstituing OG mask values
-    c = [substitute.(b_i, Ref(rev_Dict_differentials)) for b_i in b]
+    # unmask Differential terms in masked_colloc_equations
+    colloc_equations = [substitute.(masked_colloc_equation, Ref(rev_Dict_differentials))
+                    for masked_colloc_equation in masked_colloc_equations]
 
-    # get losses, zip each equation with args for each build_loss call per equation vector
-    loss_functions = [[build_loss_function(pinnrep, eq, pde_indvar)
-                       for (eq, pde_indvar, integration_indvar) in zip(c[i],
+    # nested vector of datafree_pde_loss_functions (as in discretize.jl)
+    # each sub vector has dataset's indvar coord's datafree_colloc_loss_function, n_subvectors = n_rows_dataset(or n_indvar_coords_dataset)
+    # zip each colloc equation with args for each build_loss call per equation vector
+    datafree_colloc_loss_functions = [[build_loss_function(pinnrep, eq, pde_indvar)
+                       for (eq, pde_indvar, integration_indvar) in zip(colloc_equation,
         pinnrep.pde_indvars,
-        pinnrep.pde_integration_vars)] for i in eachindex(c)]
+        pinnrep.pde_integration_vars)] for colloc_equation in colloc_equations]
 
-    return loss_functions
+    return datafree_colloc_loss_functions
 end
 
-# dataset_pde has normal matrix format 
-# dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
 function get_symbols(dataset, depvars, eqs)
+    # take only values of depvars from dataset
     depvar_vals = [dataset_i[:, 1] for dataset_i in dataset]
-    # order of depvars
+    # order of pinnrep.depvars, depvar_vals, BayesianPINN.dataset must be same
     to_subs = Dict(depvars .=> depvar_vals)
 
-    asrt = Symbolics.get_variables.(eqs)
-    # want only symbols of depvars
-    temp = unique(reduce(vcat, asrt))
+    numform_vars = Symbolics.get_variables.(eqs)
+    Eq_vars = unique(reduce(vcat, numform_vars))
+    # got equation's depvar num format {x(t)} for use in substitute()
 
     tobe_subs = Dict()
     for a in depvars
-        for i in temp
+        for i in Eq_vars
             expr = toexpr(i)
             if (expr isa Expr) && (expr.args[1] == a)
                 tobe_subs[a] = i
             end
         end
     end
+    # depvar symbolic and num format got, tobe_subs : Dict{Any, Any}(:y => y(t), :x => x(t))
 
     return to_subs, tobe_subs
 end
@@ -373,37 +376,39 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     newloss = if Dict_differentials isa Nothing
         nothing
     else
-        yuh1 = get_lossy(pinnrep, dataset_pde, Dict_differentials)
-        # eqs = pinnrep.bcs
-        # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
-
-        # consider all dataset domain points and for each row new set of equation loss function
-        # this is a vector of tuple{vector,nothing}
-        pde_loss_functions = [merge_strategy_with_loglikelihood_function(
-            pinnrep::PINNRepresentation,
+        datafree_colloc_loss_functions = get_lossy(pinnrep, dataset_pde, Dict_differentials)
+        # equals number of indvar coords in dataset
+        # add case for if parameters present in bcs?
+
+        train_sets_pde = get_dataset_train_points(pde_system.eqs,
+                dataset_pde,
+                pinnrep)
+        colloc_train_sets = [[hcat(train_sets_pde[i][:, j]...)' for i in eachindex(datafree_colloc_loss_functions[1])] for j in eachindex(datafree_colloc_loss_functions)]
+
+        # for each datafree_colloc_loss_function create loss_functions by passing dataset's indvar coords as train_sets_pde.
+        # placeholder strategy = GridTraining(0.1), datafree_bc_loss_function and train_sets_bc must be nothing
+        # order of indvar coords will be same as corresponding depvar coords values in dataset provided in get_lossy() call.
+        pde_loss_function_points = [merge_strategy_with_loglikelihood_function(
+            pinnrep,
             GridTraining(0.1),
-            yuh1[i],
+            datafree_colloc_loss_functions[i],
             nothing;
-            # pass transformation of each dataset row-corresponds to each point, for each depvar dataset point merged equation vector
-            train_sets_pde = get_dataset_train_points(pde_system.eqs,
-                [Array(data[i, :]') for data in dataset_pde],
-                pinnrep),
+            train_sets_pde = colloc_train_sets[i],
             train_sets_bc = nothing)
-                              for i in eachindex(yuh1)]
+                              for i in eachindex(datafree_colloc_loss_functions)]
 
         function L2_loss2(θ, allstd)
             stdpdesnew = allstd[4]
 
             # first vector of losses,from tuple -> pde losses, first[1] pde loss
-            pde_loglikelihoods = [[logpdf(Normal(0, stdpdesnew[j]), pde_loss_function(θ))
-                                   for (j, pde_loss_function) in enumerate(pde_loss_functions[i][1])]
-                                  for i in eachindex(pde_loss_functions)]
+            pde_loglikelihoods = [sum([pde_loss_function(θ, stdpdesnew[i])
+                                       for (i, pde_loss_function) in enumerate(pde_loss_functions[1])])
+                                  for pde_loss_functions in pde_loss_function_points]
 
-            # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+            # bc_loglikelihoods = [sum([bc_loss_function(θ, stdpdesnew[i]) for (i, bc_loss_function) in enumerate(pde_loss_function_points[1])]) for pde_loss_function_points in pde_loss_functions]
             #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
 
-            return sum(sum(pde_loglikelihoods))
-            # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+            return sum(pde_loglikelihoods)
         end
     end
 
@@ -437,9 +442,6 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # NN solutions for loglikelihood which is used for L2lossdata
     Φ = pinnrep.phi
 
-    # for new L2 loss
-    # discretization.additional_loss = 
-
     if nchains < 1
         throw(error("number of chains must be greater than or equal to 1"))
     end
diff --git a/src/discretize.jl b/src/discretize.jl
index dfb9b5174c..76378adbeb 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -608,7 +608,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
     end
 
     function get_likelihood_estimate_function(discretization::BayesianPINN)
-        # Because seperate reweighting code section needed and loglikelihood is pointwise independant
+        # Because separate reweighting code section needed and loglikelihood is pointwise independent
         pde_loss_functions, bc_loss_functions = merge_strategy_with_loglikelihood_function(
             pinnrep,
             strategy,
@@ -652,18 +652,19 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
         function full_loss_function(θ, allstd::Vector{Vector{Float64}})
             stdpdes, stdbcs, stdextra, stdpdesnew = allstd
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
-            # pde_loglikelihoods = sum([logpdf(MvNormal(pde_loss_function(θ)[1, :], LinearAlgebra.Diagonal(abs2.(stdpdes[i] .* ones(pde_loss_length[i])))), zeros(pde_loss_length[i])) for (i, pde_loss_function) in enumerate(pde_loss_functions)])
-            pde_loglikelihoods = sum([pde_loss_function(θ, stdpdes[i]) for (i, pde_loss_function) in enumerate(pde_loss_functions)])
-            # bc_loglikelihoods = sum([logpdf(MvNormal(bc_loss_function(θ)[1, :], LinearAlgebra.Diagonal(abs2.(stdbcs[j] .* ones(bc_loss_length[j])))), zeros(bc_loss_length[j])) for (j, bc_loss_function) in enumerate(bc_loss_functions)])
-            bc_loglikelihoods = sum([bc_loss_function(θ, stdbcs[j]) for (j, bc_loss_function) in enumerate(bc_loss_functions)])
+
+            pde_loglikelihoods = sum([pde_loss_function(θ, stdpdes[i])
+                                      for (i, pde_loss_function) in enumerate(pde_loss_functions)])
+            bc_loglikelihoods = sum([bc_loss_function(θ, stdbcs[j])
+                                     for (j, bc_loss_function) in enumerate(bc_loss_functions)])
+
             if !(datapde_loss_functions isa Nothing)
-                pde_loglikelihoods += sum([datapde_loss_function(θ, stdpdes[i]) for (i, datapde_loss_function) in enumerate(datapde_loss_functions)])
-                # sum([logpdf(MvNormal(datapde_loss_function(θ)[1, :], LinearAlgebra.Diagonal(abs2.(stdpdes[i] .* ones(datapde_length[i])))), zeros(datapde_length[i])) for (i, datapde_loss_function) in enumerate(datapde_loss_functions)])
+                pde_loglikelihoods += sum([datapde_loss_function(θ, stdpdes[i])
+                                           for (i, datapde_loss_function) in enumerate(datapde_loss_functions)])
             end
-
             if !(databc_loss_functions isa Nothing)
-                bc_loglikelihoods += sum([databc_loss_function(θ, stdbcs[j]) for (j, databc_loss_function) in enumerate(databc_loss_functions)])
-                # sum([logpdf(MvNormal(databc_loss_function(θ)[1, :], LinearAlgebra.Diagonal(abs2.(stdbcs[j] .* ones(databc_length[j])))), zeros(databc_length[j])) for (j, databc_loss_function) in enumerate(databc_loss_functions)])
+                bc_loglikelihoods += sum([databc_loss_function(θ, stdbcs[j])
+                                          for (j, databc_loss_function) in enumerate(databc_loss_functions)])
             end
 
             # this is kind of a hack, and means that whenever the outer function is evaluated the increment goes up, even if it's not being optimized
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index 9fdff04f06..660b7cb2fb 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -29,10 +29,6 @@ function get_dataset_train_points(eqs, train_sets, pinnrep)
 
     # train_sets follows order of depvars
     # take dataset indvar values if for equations depvar's indvar matches input symbol indvar
-    # points =  [vcat([train_sets[i][:, 2:end]'
-    #                 for i in eachindex(symbols_input) if symbols_input[i][2] == eq_arg]...)
-    #           for eq_arg in eq_args]
-
     points = []
     for eq_arg in eq_args
         eq_points = []
@@ -95,8 +91,13 @@ end
 
 function get_points_loss_functions(loss_function, train_set, eltypeθ, strategy::GridTraining;
         τ = nothing)
+        # loss_function length is number of all points loss is being evaluated upon
+        # train sets rows are for each indvar, cols are coordinates (row_1,row_2,..row_n) at which loss evaluated
     function loss(θ, std)
-        logpdf(MvNormal(loss_function(train_set, θ)[1, :], LinearAlgebra.Diagonal(abs2.(std .* ones(length(train_set))))), zeros(length(train_set)))
+        logpdf(
+            MvNormal(loss_function(train_set, θ)[1, :],
+                LinearAlgebra.Diagonal(abs2.(std .* ones(size(train_set)[2])))),
+            zeros(size(train_set)[2]))
     end
 end
 
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 6dd3637f5a..7923a14daf 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -8,7 +8,7 @@ using Flux
 
 Random.seed!(100)
 
-@testset "Example 1: 2D Periodic System" begin
+@testset "Example 1: 1D Periodic System" begin
     # Cos(pi*t) example
     @parameters t
     @variables u(..)
@@ -35,8 +35,9 @@ Random.seed!(100)
     ts = vec(sol1.timepoints[1])
     u_real = [analytic_sol_func(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
-    @test u_predict≈u_real atol=0.5
-    @test mean(u_predict .- u_real) < 0.1
+
+    @test u_predict≈u_real atol=0.05
+    @test mean(u_predict .- u_real) < 0.001
 end
 
 @testset "Example 2: 1D ODE" begin
@@ -67,13 +68,13 @@ end
         bcstd = [0.1],
         phystd = [0.05],
         priorsNNw = (0.0, 10.0),
-        saveats = [1 / 100.0])
+        saveats = [1 / 100.0], progress=true)
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol1.timepoints[1]
     u_real = vec([analytic_sol_func(t) for t in ts])
     u_predict = pmean(sol1.ensemblesol[1])
-    @test u_predict≈u_real atol=0.8
+    @test u_predict≈u_real atol=0.5
 end
 
 @testset "Example 3: 3rd Degree ODE" begin
@@ -156,9 +157,9 @@ end
     sol1 = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
         draw_samples = 200,
-        bcstd = [0.003, 0.003, 0.003, 0.003],
-        phystd = [0.003],
-        priorsNNw = (0.0, 10.0),
+        bcstd = [0.01, 0.01, 0.01, 0.01],
+        phystd = [0.005],
+        priorsNNw = (0.0, 2.0),
         saveats = [1 / 100.0, 1 / 100.0])
 
     xs = sol1.timepoints[1]
@@ -166,7 +167,7 @@ end
 
     u_predict = pmean(sol1.ensemblesol[1])
     u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-    @test u_predict≈u_real atol=1.5
+    @test u_predict≈u_real atol=0.8
 end
 
 @testset "Translating from Flux" begin
@@ -204,5 +205,5 @@ end
     ts = sol1.timepoints[1]
     u_real = vec([analytic_sol_func(t) for t in ts])
     u_predict = pmean(sol1.ensemblesol[1])
-    @test u_predict≈u_real atol=0.8
+    @test u_predict≈u_real atol=0.1
 end
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 250f98810b..837a5cc79b 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -7,7 +7,7 @@ using ComponentArrays, ModelingToolkit
 
 Random.seed!(100)
 
-@testset "Example 1: 2D Periodic System with parameter estimation" begin
+@testset "Example 1: 1D Periodic System with parameter estimation" begin
     # Cos(pi*t) periodic curve
     @parameters t, p
     @variables u(..)
@@ -88,9 +88,9 @@ Random.seed!(100)
     u_real = [analytic_sol_func1(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
 
-    @test u_predict≈u_real atol=1.5
-    @test mean(u_predict .- u_real) < 0.1
-    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
+    @test u_predict≈u_real atol=0.1
+    @test mean(u_predict .- u_real) < 0.01
+    @test sol1.estimated_de_params[1]≈param atol=0.1
 end
 
 @testset "Example 2: Lorenz System with parameter estimation" begin
@@ -152,441 +152,4 @@ end
     p_ = sol1.estimated_de_params[1]
     @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-end
-
-function recur_expression(exp, Dict_differentials)
-    for in_exp in exp.args
-        if !(in_exp isa Expr)
-            # skip +,== symbols, characters etc
-            continue
-
-        elseif in_exp.args[1] isa ModelingToolkit.Differential
-            # first symbol of differential term
-            # Dict_differentials for masking differential terms
-            # and resubstituting differentials in equations after putting in interpolations
-            # temp = in_exp.args[end]
-            Dict_differentials[eval(in_exp)] = Symbolics.variable("diff_$(length(Dict_differentials) + 1)")
-            return
-        else
-            recur_expression(in_exp, Dict_differentials)
-        end
-    end
-end
-
-println("Example 3: 2D Periodic System with New parameter estimation")
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-    dataset = [dataset, nothing])
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)],
-    Dict_differentials = Dict_differentials)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-println("Example 3: Lotka Volterra with New parameter estimation")
-@parameters t α β γ δ
-@variables x(..) y(..)
-
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ α * x(t) - β * x(t) * y(t), Dt(y(t)) ~ -γ * y(t) + δ * x(t) * y(t)]
-bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
-domains = [t ∈ Interval(0.0, 6.0)]
-
-# Define the parameters' values
-# params = [α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]
-# p = [1.5, 1.0, 3.0, 1.0]
-
-chainl = [
-    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-        Lux.Dense(6, 1)),
-    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-        Lux.Dense(6, 1))
-]
-
-initl, st = Lux.setup(Random.default_rng(), chainl[1])
-initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [x(t), y(t)],
-    [α, β, γ, δ],
-    defaults = Dict([α => 5, β => 0, γ => 0.5, δ => 2]))
-
-using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
-
-function lotka_volterra(u, p, t)
-    α, β, γ, δ = p
-    x, y = u
-    dx = (α - β * y) * x
-    dy = (δ * x - γ) * y
-    return [dx, dy]
-end
-
-# initial-value problem.
-u0 = [1.0, 1.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 4.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-
-# Solve using OrdinaryDiffEq.jl solver
-dt = 0.05
-solution = solve(prob, Tsit5(); saveat = dt)
-
-# Extract solution
-time = solution.t
-u = hcat(solution.u...)
-u1 = u .+ ((0.3 .* randn(size(u))) .* u)
-
-# using Plots, StatsPlots
-# plotly()
-# plot(time, u[1, :])
-# plot!(time, u[2, :])
-# Construct dataset
-dataset = [hcat(u1[i, :], time) for i in 1:2]
-
-discretization = BayesianPINN(chainl, GridTraining(0.01), param_estim = true,
-    dataset = [dataset, nothing])
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(0, 2),
-        Normal(0, 2),
-        Normal(0, 2),
-        Normal(0, 2)
-    ])
-
-# plot(sol2.timepoints[1]', sol2.ensemblesol[1])
-# plot!(sol2.timepoints[2]', sol2.ensemblesol[2])
-
-# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
-# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.1, 0.1],
-#     phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-#     phystdnew = [1, 1],
-#     priorsNNw = (0.0, 3.0),
-#     saveats = [1 / 50.0],
-#     # Kernel = AdvancedHMC.NUTS(0.8),
-#     param = [
-#         Normal(1, 2),
-#         Normal(2, 2),
-#         Normal(2, 2),
-#         Normal(0, 2)
-#     ], Dict_differentials = Dict_differentials, progress = true)
-
-# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
-# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
-
-# sol3 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# sol = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     # Kernel = AdvancedHMC.NUTS(0.8),
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ], progress = true)
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
-# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
-
-sol = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ])
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    phystdnew = [0.5, 0.5],
-    #  Kernel = AdvancedHMC.NUTS(0.8),
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ],
-    Dict_differentials = Dict_differentials)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-# points1 = []
-# for eq_arg in eq_args
-#     a = []
-#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
-#     for i in eachindex(symbols_input)
-#         if symbols_input[i][2] == eq_arg
-#             # include domain points of that depvar
-#             # each loss equation take domain matrix [points..;points..]
-#             push!(a, train_sets[i][:, 2:end]')
-#         end
-#     end
-#     # vcat as new row for next equation
-#     push!(points1, vcat(a...))
-# end
-# println(points1 == points)
-
-# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-# import ModelingToolkit: Interval, infimum, supremum, Distributions
-# using Plots, MonteCarloMeasurements
-
-# @parameters x, t, α
-# @variables u(..)
-# Dt = Differential(t)
-# Dx = Differential(x)
-# Dx2 = Differential(x)^2
-# Dx3 = Differential(x)^3
-# Dx4 = Differential(x)^4
-
-# # α = 1
-# β = 4
-# γ = 1
-# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-# bcs = [u(x, 0) ~ u_analytic(x, 0),
-#     u(-10, t) ~ u_analytic(-10, t),
-#     u(10, t) ~ u_analytic(10, t),
-#     Dx(u(-10, t)) ~ du(-10, t),
-#     Dx(u(10, t)) ~ du(10, t)]
-
-# # Space and time domains
-# domains = [x ∈ Interval(-10.0, 10.0),
-#     t ∈ Interval(0.0, 1.0)]
-
-# # Discretization
-# dx = 0.4;
-# dt = 0.2;
-
-# # Function to compute analytical solution at a specific point (x, t)
-# function u_analytic_point(x, t)
-#     z = -x / 2 + t
-#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# end
-
-# # Function to generate the dataset matrix
-# function generate_dataset_matrix(domains, dx, dt)
-#     x_values = -10:dx:10
-#     t_values = 0.0:dt:1.0
-
-#     dataset = []
-
-#     for t in t_values
-#         for x in x_values
-#             u_value = u_analytic_point(x, t)
-#             push!(dataset, [u_value, x, t])
-#         end
-#     end
-
-#     return vcat([data' for data in dataset]...)
-# end
-
-# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
-
-# # noise to dataset
-# noisydataset = deepcopy(datasetpde)
-# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
-#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
-#                         noisydataset[1][:, 1]
-
-# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-# # Neural network
-# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-#     Lux.Dense(8, 8, Lux.tanh),
-#     Lux.Dense(8, 1))
-
-# discretization = NeuralPDE.BayesianPINN([chain],
-#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-# @named pde_system = PDESystem(eq,
-#     bcs,
-#     domains,
-#     [x, t],
-#     [u(x, t)],
-#     [α],
-#     defaults = Dict([α => 0.5]))
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
-
-# eqs = pde_system.eqs
-# Dict_differentials = Dict()
-# exps = toexpr.(eqs)
-# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-# sol2 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
-#     param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
-#     progress = true)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
+end
\ No newline at end of file

From 11bbba7d4cd11fe0b0c5cf0886307955e603f550 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 29 Feb 2024 00:44:59 +0530
Subject: [PATCH 028/107] update BPINN_ode, BPINN_PDE_tests

---
 src/BPINN_ode.jl        | 3 ---
 test/BPINN_PDE_tests.jl | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 7e39c24eef..f092d043c5 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -217,9 +217,6 @@ function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
 
     if chain isa Lux.AbstractExplicitLayer
         θinit, st = Lux.setup(Random.default_rng(), chain)
-        println(length(θinit))
-        println(length(samples[1]))
-        println(draw_samples)
         θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
              for i in 1:max(draw_samples - draw_samples ÷ 10, draw_samples - 1000)]
         
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 7923a14daf..f5ef2e7706 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -68,7 +68,7 @@ end
         bcstd = [0.1],
         phystd = [0.05],
         priorsNNw = (0.0, 10.0),
-        saveats = [1 / 100.0], progress=true)
+        saveats = [1 / 100.0])
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol1.timepoints[1]

From 908cb5be4d3fc08cd88a43f917c4a7efd39d7e8c Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 29 Feb 2024 02:33:41 +0530
Subject: [PATCH 029/107] update BPINN_PDE_tests.jl

---
 test/BPINN_PDE_tests.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index f5ef2e7706..325ebc6f99 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -36,7 +36,7 @@ Random.seed!(100)
     u_real = [analytic_sol_func(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
 
-    @test u_predict≈u_real atol=0.05
+    @test u_predict≈u_real atol=0.08
     @test mean(u_predict .- u_real) < 0.001
 end
 
@@ -199,11 +199,11 @@ end
         bcstd = [0.1],
         phystd = [0.05],
         priorsNNw = (0.0, 10.0),
-        saveats = [1 / 100.0])
+        saveats = [1 / 100.0],progress=true)
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol1.timepoints[1]
     u_real = vec([analytic_sol_func(t) for t in ts])
     u_predict = pmean(sol1.ensemblesol[1])
-    @test u_predict≈u_real atol=0.1
+    @test u_predict≈u_real atol=0.5
 end

From 585a4f5717deaae48b124eb9ebdf07d3c03d7562 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 29 Feb 2024 10:33:18 +0530
Subject: [PATCH 030/107] update BPINN_PDE_tests.jl

---
 test/BPINN_PDE_tests.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 325ebc6f99..827b75cd6e 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -25,10 +25,10 @@ Random.seed!(100)
 
     sol1 = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
-        draw_samples = 1500,
+        draw_samples = 2000,
         bcstd = [0.02],
         phystd = [0.01],
-        priorsNNw = (0.0, 1.0),
+        priorsNNw = (0.0, 10.0),
         saveats = [1 / 50.0])
 
     analytic_sol_func(u0, t) = u0 + sin(2 * π * t) / (2 * π)
@@ -36,8 +36,8 @@ Random.seed!(100)
     u_real = [analytic_sol_func(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
 
-    @test u_predict≈u_real atol=0.08
-    @test mean(u_predict .- u_real) < 0.001
+    @test u_predict≈u_real atol=0.05
+    @test mean(u_predict .- u_real) < 1e-5
 end
 
 @testset "Example 2: 1D ODE" begin
@@ -199,7 +199,7 @@ end
         bcstd = [0.1],
         phystd = [0.05],
         priorsNNw = (0.0, 10.0),
-        saveats = [1 / 100.0],progress=true)
+        saveats = [1 / 100.0])
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol1.timepoints[1]

From cf77408d7f80e0163d2fc1195b5508d8bbdf7058 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 19:54:59 +0530
Subject: [PATCH 031/107] done for now

---
 src/BPINN_ode.jl              | 12 +++--
 src/PDE_BPINN.jl              |  4 +-
 test/BPINN_PDE_tests.jl       |  2 +-
 test/BPINN_PDEinvsol_tests.jl | 94 ++++++++++++++++++++++++++++++++++-
 test/bpinnexperimental.jl     | 82 +++++++++++++++++++-----------
 5 files changed, 156 insertions(+), 38 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index f092d043c5..e1567bf4e2 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -100,6 +100,8 @@ struct BNNODE{C, K, IT <: NamedTuple,
     init_params::I
     Adaptorkwargs::A
     Integratorkwargs::IT
+    numensemble::Int64
+    estim_collocate::Bool
     autodiff::Bool
     progress::Bool
     verbose::Bool
@@ -112,6 +114,8 @@ function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
         Metric = DiagEuclideanMetric,
         targetacceptancerate = 0.8),
     Integratorkwargs = (Integrator = Leapfrog,),
+    numensemble = floor(Int, alg.draw_samples / 3),
+    estim_collocate = false,
     autodiff = false, progress = false, verbose = false)
     !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
     BNNODE(chain, Kernel, strategy,
@@ -119,6 +123,7 @@ function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
         phystd, dataset, physdt, MCMCkwargs,
         nchains, init_params,
         Adaptorkwargs, Integratorkwargs,
+        numensemble, estim_collocate,
         autodiff, progress, verbose)
 end
 
@@ -177,13 +182,12 @@ function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
     reltol = 1.0f-3,
     verbose = false,
     saveat = 1 / 50.0,
-    maxiters = nothing,
-    numensemble = floor(Int, alg.draw_samples / 3),
-    estim_collocate = false)
+    maxiters = nothing)
+
     @unpack chain, l2std, phystd, param, priorsNNw, Kernel, strategy,
     draw_samples, dataset, init_params,
     nchains, physdt, Adaptorkwargs, Integratorkwargs,
-    MCMCkwargs, autodiff, progress, verbose = alg
+    MCMCkwargs, numensemble, estim_collocate, autodiff, progress, verbose = alg
 
     # ahmc_bayesian_pinn_ode needs param=[] for easier vcat operation for full vector of parameters
     param = param === nothing ? [] : param
diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index b18c35aa99..516ecdcb97 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -394,7 +394,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
             datafree_colloc_loss_functions[i],
             nothing;
             train_sets_pde = colloc_train_sets[i],
-            train_sets_bc = nothing)
+            train_sets_bc = nothing)[1]
                               for i in eachindex(datafree_colloc_loss_functions)]
 
         function L2_loss2(θ, allstd)
@@ -402,7 +402,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
 
             # first vector of losses,from tuple -> pde losses, first[1] pde loss
             pde_loglikelihoods = [sum([pde_loss_function(θ, stdpdesnew[i])
-                                       for (i, pde_loss_function) in enumerate(pde_loss_functions[1])])
+                                       for (i, pde_loss_function) in enumerate(pde_loss_functions)])
                                   for pde_loss_functions in pde_loss_function_points]
 
             # bc_loglikelihoods = [sum([bc_loss_function(θ, stdpdesnew[i]) for (i, bc_loss_function) in enumerate(pde_loss_function_points[1])]) for pde_loss_function_points in pde_loss_functions]
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 827b75cd6e..98edd13fa0 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -37,7 +37,7 @@ Random.seed!(100)
     u_predict = pmean(sol1.ensemblesol[1])
 
     @test u_predict≈u_real atol=0.05
-    @test mean(u_predict .- u_real) < 1e-5
+    @test mean(u_predict .- u_real) < 1e-3
 end
 
 @testset "Example 2: 1D ODE" begin
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 837a5cc79b..7e2356c5bc 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -152,4 +152,96 @@ end
     p_ = sol1.estimated_de_params[1]
     @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-end
\ No newline at end of file
+end
+
+function recur_expression(exp, Dict_differentials)
+    for in_exp in exp.args
+        if !(in_exp isa Expr)
+            # skip +,== symbols, characters etc
+            continue
+
+        elseif in_exp.args[1] isa ModelingToolkit.Differential
+            # first symbol of differential term
+            # Dict_differentials for masking differential terms
+            # and resubstituting differentials in equations after putting in interpolations
+            # temp = in_exp.args[end]
+            Dict_differentials[eval(in_exp)] = Symbolics.variable("diff_$(length(Dict_differentials) + 1)")
+            return
+        else
+            recur_expression(in_exp, Dict_differentials)
+        end
+    end
+end
+
+println("Example 3: 2D Periodic System with New parameter estimation")
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) * u(t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+    dataset = [dataset, nothing])
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01], phystdnew = [0.05],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)],
+    progress = true)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+ts = vec(sol2.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol2.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file
diff --git a/test/bpinnexperimental.jl b/test/bpinnexperimental.jl
index 3de049bf58..a8a389ad44 100644
--- a/test/bpinnexperimental.jl
+++ b/test/bpinnexperimental.jl
@@ -44,20 +44,32 @@ plot!(solution, labels = ["x" "y"])
 chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
     Lux.Dense(6, 2))
 
-alg = BNNODE(chain;
-dataset = dataset,
-draw_samples = 1000,
-l2std = [0.1, 0.1],
-phystd = [0.1, 0.1],
-priorsNNw = (0.0, 3.0),
-param = [
-    Normal(1, 2),
-    Normal(2, 2),
-    Normal(2, 2),
-    Normal(0, 2)], progress = true)
-
-@time sol_pestim1 = solve(prob, alg; saveat = dt,)
-@time sol_pestim2 = solve(prob, alg; estim_collocate = true, saveat = dt)
+alg1 = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.1, 0.1],
+    priorsNNw = (0.0, 3.0),
+    param = [
+        Normal(1, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(0, 2)], progress = true)
+
+alg2 = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.1, 0.1],
+    priorsNNw = (0.0, 3.0),
+    param = [
+        Normal(1, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(0, 2)], estim_collocate = true, progress = true)
+
+@time sol_pestim1 = solve(prob, alg1; saveat = dt)
+@time sol_pestim2 = solve(prob, alg2; saveat = dt)
 plot(times, sol_pestim1.ensemblesol[1], label = "estimated x1")
 plot!(times, sol_pestim2.ensemblesol[1], label = "estimated x2")
 plot!(times, sol_pestim1.ensemblesol[2], label = "estimated y1")
@@ -66,28 +78,29 @@ plot!(times, sol_pestim2.ensemblesol[2], label = "estimated y2")
 # comparing it with the original solution
 plot!(solution, labels = ["true x" "true y"])
 
-@show sol_pestim1.estimated_ode_params
-@show sol_pestim2.estimated_ode_params
+@show sol_pestim1.estimated_de_params
+@show sol_pestim2.estimated_de_params
 
-function fitz(u, p , t)
+function fitz(u, p, t)
     v, w = u[1], u[2]
-    a,b,τinv,l = p[1], p[2], p[3], p[4]
-    
-    dv = v - 0.33*v^3 -w + l
-    dw = τinv*(v +  a - b*w)
+    a, b, τinv, l = p[1], p[2], p[3], p[4]
+
+    dv = v - 0.33 * v^3 - w + l
+    dw = τinv * (v + a - b * w)
 
     return [dv, dw]
 end
 
-prob_ode_fitzhughnagumo = ODEProblem(fitz, [1.0,1.0], (0.0,10.0), [0.7,0.8,1/12.5,0.5])
+prob_ode_fitzhughnagumo = ODEProblem(
+    fitz, [1.0, 1.0], (0.0, 10.0), [0.7, 0.8, 1 / 12.5, 0.5])
 dt = 0.5
 sol = solve(prob_ode_fitzhughnagumo, Tsit5(), saveat = dt)
 
 sig = 0.20
 data = Array(sol)
-dataset = [data[1,:] .+ (sig .* rand(length(sol.t))), data[2, :] .+ (sig .* rand(length(sol.t))), sol.t]
-priors = [Normal(0.5,1.0), Normal(0.5,1.0), Normal(0.0,0.5), Normal(0.5,1.0)]
-
+dataset = [data[1, :] .+ (sig .* rand(length(sol.t))),
+    data[2, :] .+ (sig .* rand(length(sol.t))), sol.t]
+priors = [Normal(0.5, 1.0), Normal(0.5, 1.0), Normal(0.0, 0.5), Normal(0.5, 1.0)]
 
 plot(sol.t, dataset[1], label = "noisy x")
 plot!(sol.t, dataset[2], label = "noisy y")
@@ -98,7 +111,7 @@ chain = Lux.Chain(Lux.Dense(1, 10, tanh), Lux.Dense(10, 10, tanh),
 
 Adaptorkwargs = (Adaptor = AdvancedHMC.StanHMCAdaptor,
     Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.8)
-alg = BNNODE(chain;
+alg1 = BNNODE(chain;
 dataset = dataset,
 draw_samples = 1000,
 l2std = [0.1, 0.1],
@@ -107,12 +120,21 @@ priorsNNw = (0.01, 3.0),
 Adaptorkwargs = Adaptorkwargs,
 param = priors, progress = true)
 
-@time sol_pestim3 = solve(prob_ode_fitzhughnagumo, alg; saveat = dt)
-@time sol_pestim4 = solve(prob_ode_fitzhughnagumo, alg; estim_collocate = true, saveat = dt)
+alg2 = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.1, 0.1],
+    priorsNNw = (0.01, 3.0),
+    Adaptorkwargs = Adaptorkwargs,
+    param = priors, estim_collocate = true, progress = true)
+
+@time sol_pestim3 = solve(prob_ode_fitzhughnagumo, alg1; saveat = dt)
+@time sol_pestim4 = solve(prob_ode_fitzhughnagumo, alg2; saveat = dt)
 plot!(sol.t, sol_pestim3.ensemblesol[1], label = "estimated x1")
 plot!(sol.t, sol_pestim4.ensemblesol[1], label = "estimated x2")
 plot!(sol.t, sol_pestim3.ensemblesol[2], label = "estimated y1")
 plot!(sol.t, sol_pestim4.ensemblesol[2], label = "estimated y2")
 
-@show sol_pestim3.estimated_ode_params
-@show sol_pestim4.estimated_ode_params
+@show sol_pestim3.estimated_de_params
+@show sol_pestim4.estimated_de_params

From 16e1b56d5243f0ff6c68414407c5f9b94043b42f Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 20:50:40 +0530
Subject: [PATCH 032/107] merge conflict resolution

---
 src/NeuralPDE.jl        |  2 ++
 src/advancedHMC_MCMC.jl | 20 +++++++-------------
 src/collocated_estim.jl | 27 ---------------------------
 3 files changed, 9 insertions(+), 40 deletions(-)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 9bbdf5ce49..4799cf0bae 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -30,6 +30,7 @@ using DomainSets: Domain, ClosedInterval, AbstractInterval, leftendpoint, righte
 using SciMLBase: @add_kwonly, parameterless_type
 using UnPack: @unpack
 import ChainRulesCore, Lux, ComponentArrays
+using Lux: FromFluxAdaptor
 using ChainRulesCore: @non_differentiable
 
 RuntimeGeneratedFunctions.init(@__MODULE__)
@@ -51,6 +52,7 @@ include("neural_adapter.jl")
 include("advancedHMC_MCMC.jl")
 include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
+include("dgm.jl")
 include("collocated_estim.jl")
 
 export NNODE, NNDAE,
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 9064ddd9fa..7ca18bd58b 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -86,14 +86,6 @@ end
 vector_to_parameters(ps_new::AbstractVector, ps::AbstractVector) = ps_new
 
 function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
-    # if Tar.estim_collocate
-    #     return physloglikelihood(Tar, θ) / length(Tar.dataset[1]) + priorweights(Tar, θ) +
-    #            L2LossData(Tar, θ) / length(Tar.dataset[1]) +
-    #            L2loss2(Tar, θ) / length(Tar.dataset[1])
-    # else
-    #     return physloglikelihood(Tar, θ) / length(Tar.dataset[1]) + priorweights(Tar, θ) +
-    #            L2LossData(Tar, θ) / length(Tar.dataset[1])
-    # end
     if Tar.estim_collocate
         return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ) +
                L2loss2(Tar, θ)
@@ -455,7 +447,9 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     MCMCkwargs = (n_leapfrog = 30,),
     progress = false, verbose = false,
     estim_collocate = false)
-    !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+
+    !(chain isa Lux.AbstractExplicitLayer) &&
+        (chain = adapt(FromFluxAdaptor(false, false), chain))
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)
         throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
@@ -464,7 +458,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     strategy = strategy == GridTraining ? strategy(physdt) : strategy
 
     if dataset != [nothing] &&
-       (length(dataset) < 2 || !(typeof(dataset) <: Vector{<:Vector{<:AbstractFloat}}))
+       (length(dataset) < 2 || !(dataset isa Vector{<:Vector{<:AbstractFloat}}))
         throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
     end
 
@@ -475,7 +469,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     end
 
     if chain isa Lux.AbstractExplicitLayer
-        # Flux-vector, Lux-Named Tuple
+        # Lux-Named Tuple
         initial_nnθ, recon, st = generate_Tar(chain, init_params)
     else
         error("Only Lux.AbstractExplicitLayer Neural networks are supported")
@@ -553,7 +547,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
             MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
             Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
             samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
-                progress = progress, verbose = verbose, drop_warmup = true)
+                progress = progress, verbose = verbose)
 
             samplesc[i] = samples
             statsc[i] = stats
@@ -571,7 +565,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
         Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
         samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
-            adaptor; progress = progress, verbose = verbose, drop_warmup = true)
+            adaptor; progress = progress, verbose = verbose)
 
         @info("Sampling Complete.")
         @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, samples[end]))
diff --git a/src/collocated_estim.jl b/src/collocated_estim.jl
index a2f81b3ed9..3902f74a27 100644
--- a/src/collocated_estim.jl
+++ b/src/collocated_estim.jl
@@ -162,31 +162,4 @@ function calderivatives(prob, dataset)
     gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
 
     return gradients
-end
-
-function calculate_derivatives(dataset)
-
-    # u = dataset[1]
-    # u1 = dataset[2]
-    # t = dataset[end]
-    # # control points
-    # n = Int(floor(length(t) / 10))
-    # # spline for datasetvalues(solution) 
-    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    # interp = CubicSpline(u, t)
-    # interp1 = CubicSpline(u1, t)
-    # # derivatives interpolation
-    # dx = t[2] - t[1]
-    # time = collect(t[1]:dx:t[end])
-    # smoothu = [interp(i) for i in time]
-    # smoothu1 = [interp1(i) for i in time]
-    # # derivative of the spline (must match function derivative) 
-    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
-    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
-    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
-    # # FDM
-    # # û1 = diff(u) / dx
-    # # dataset[1] and smoothu are almost equal(rounding errors)
-    # return [û, û1] 
-
 end
\ No newline at end of file

From 9f694f855a3d64cc314c868be76953694d744fb8 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 20:54:21 +0530
Subject: [PATCH 033/107] update NeuralPDE.jl, advancedHMC_MCMC.jl

---
 src/NeuralPDE.jl        | 1 -
 src/advancedHMC_MCMC.jl | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 4799cf0bae..890f938309 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -53,7 +53,6 @@ include("advancedHMC_MCMC.jl")
 include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 include("dgm.jl")
-include("collocated_estim.jl")
 
 export NNODE, NNDAE,
     PhysicsInformedNN, discretize,
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 7ca18bd58b..fdcdbdfcb4 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -448,8 +448,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     progress = false, verbose = false,
     estim_collocate = false)
 
-    !(chain isa Lux.AbstractExplicitLayer) &&
-        (chain = adapt(FromFluxAdaptor(false, false), chain))
+    !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)
         throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))

From a28d12f065a899ebff0eb9f36b833d1559ad947e Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 20:55:24 +0530
Subject: [PATCH 034/107] update NeuralPDE.jl

---
 src/NeuralPDE.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 890f938309..4799cf0bae 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -53,6 +53,7 @@ include("advancedHMC_MCMC.jl")
 include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 include("dgm.jl")
+include("collocated_estim.jl")
 
 export NNODE, NNDAE,
     PhysicsInformedNN, discretize,

From 03ee7b48b0c5f241bd2b59e1ec28181218783182 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 21:00:38 +0530
Subject: [PATCH 035/107] update NeuralPDE.jl

---
 src/NeuralPDE.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 4799cf0bae..890f938309 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -53,7 +53,6 @@ include("advancedHMC_MCMC.jl")
 include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 include("dgm.jl")
-include("collocated_estim.jl")
 
 export NNODE, NNDAE,
     PhysicsInformedNN, discretize,

From 27cfb56fb19fc487d4d4964da9dce69eb0376980 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 21:01:57 +0530
Subject: [PATCH 036/107] update NeuralPDE.jl

---
 src/NeuralPDE.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 890f938309..4799cf0bae 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -53,6 +53,7 @@ include("advancedHMC_MCMC.jl")
 include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 include("dgm.jl")
+include("collocated_estim.jl")
 
 export NNODE, NNDAE,
     PhysicsInformedNN, discretize,

From 6ef9d48a8bbd9313902e5fea0359e07ecc0ddb50 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 29 Mar 2024 22:02:18 +0530
Subject: [PATCH 037/107] pmean for tests

---
 src/BPINN_ode.jl              | 2 +-
 test/BPINN_PDEinvsol_tests.jl | 6 ++----
 test/BPINN_Tests.jl           | 8 ++++----
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 756696826d..ba699ee035 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -114,7 +114,7 @@ function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
         Metric = DiagEuclideanMetric,
         targetacceptancerate = 0.8),
     Integratorkwargs = (Integrator = Leapfrog,),
-    numensemble = floor(Int, alg.draw_samples / 3),
+    numensemble = floor(Int, draw_samples / 3),
     estim_collocate = false,
     autodiff = false, progress = false, verbose = false)
     !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 7e2356c5bc..748ff21686 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -216,8 +216,7 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0],
     param = [LogNormal(6.0, 0.5)],
-    Dict_differentials = Dict_differentials,
-    progress = true)
+    Dict_differentials = Dict_differentials)
 
 sol2 = ahmc_bayesian_pinn_pde(pde_system,
     discretization;
@@ -226,8 +225,7 @@ sol2 = ahmc_bayesian_pinn_pde(pde_system,
     phystd = [0.01], l2std = [0.01],
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)],
-    progress = true)
+    param = [LogNormal(6.0, 0.5)])
 
 param = 2 * π
 ts = vec(sol1.timepoints[1])
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 2fe347b3b4..6a32c560f0 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -53,8 +53,8 @@ Random.seed!(100)
     @test mean(abs.(physsol1 .- meanscurve)) < 0.005
 
     #--------------------- solve() call 
-    @test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
-    @test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
+    @test mean(abs.(x̂1 .- pmean(sol1lux.ensemblesol[1]))) < 0.025
+    @test mean(abs.(physsol0_1 .- pmean(sol1lux.ensemblesol[1]))) < 0.025
 end
 
 @testset "Example 2 - with parameter estimation" begin
@@ -122,7 +122,7 @@ end
     @test abs(p - mean([fhsamples[i][23] for i in 2000:length(fhsamples)])) < abs(0.35 * p)
 
     #-------------------------- solve() call  
-    @test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
+    @test mean(abs.(physsol1_1 .- pmean(sol2lux.ensemblesol[1]))) < 8e-2
 
     # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
     @test abs(p - sol2lux.estimated_de_params[1]) < abs(0.15 * p)
@@ -211,7 +211,7 @@ end
 
     #-------------------------- solve() call 
     # (lux chain)
-    @test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
+    @test mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 0.15
     # estimated parameters(lux chain)
     param1 = sol3lux_pestim.estimated_de_params[1]
     @test abs(param1 - p) < abs(0.45 * p)

From f8cf2da13eb044c210b2550f9db32097a44cac13 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 29 Mar 2024 22:11:25 +0530
Subject: [PATCH 038/107] .

---
 test/BPINN_PDEinvsol_tests.jl | 1113 ++++++++++++++++++++++++++++++++-
 1 file changed, 1112 insertions(+), 1 deletion(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 748ff21686..1fadc8a0a5 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -242,4 +242,1115 @@ u_predict = pmean(sol2.ensemblesol[1])
 
 @test u_predict≈u_real atol=1.5
 @test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+
+
+println("Example 3: Lotka Volterra with New parameter estimation")
+@parameters t α β γ δ
+@variables x(..) y(..)
+
+Dt = Differential(t)
+eqs = [Dt(x(t))*α  ~ x(t) - β * x(t) * y(t), Dt(y(t))*γ  ~ δ * x(t) * y(t) - y(t)]
+bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
+domains = [t ∈ Interval(0.0, 7.0)]
+
+# Define the parameters' values
+# params = [α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]
+# p = [1.5, 1.0, 3.0, 1.0]
+
+chainl = [
+    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),Lux.Dense(6, 1)),
+    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),Lux.Dense(6, 1))
+]
+
+initl, st = Lux.setup(Random.default_rng(), chainl[1])
+initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
+
+using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    # Model parameters. 
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+# initial-value problem.
+u0 = [1.0, 1.0]
+# p = [2/3, 2/3, 1/3.0, 1/3.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 7.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+
+
+# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
+#     smoothed_data = similar(data, T, length(data))
+
+#     for i in 1:length(data)
+#         start_idx = max(1, i - window_size)
+#         end_idx = min(length(data), i + window_size)
+#         smoothed_data[i] = mean(data[start_idx:end_idx])
+#     end
+
+#     return smoothed_data'
+# end
+
+# Extract solution
+time = solution.t
+u = hcat(solution.u...)
+time1=solution.t
+u_noisy = u .+ u .* (0.3 .* randn(size(u)))
+
+plot(time,u[1,:])
+plot!(time,u[2,:])
+scatter!(time1,u_noisy[1,:])
+scatter!(time1,u_noisy[2,:])
+
+# window_size = 5
+# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
+#                      for i in 1:length(solution.u[1])]
+# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
+# Randomly select some points from the solution
+num_points = 150  # Number of points to select
+selected_indices = rand(1:size(u_noisy, 2), num_points)
+upoints = [u_noisy[:, i] for i in selected_indices]
+timepoints = [time[i] for i in selected_indices]
+temp=hcat(upoints...)
+dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
+
+# plot(time,u[1,:])
+# plot!(time,u[2,:])
+
+discretization = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [x(t), y(t)],
+    [α, β, γ, δ],
+    defaults = Dict([α =>2, β => 3, γ =>3, δ =>2]))
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], progress = true)
+
+# time
+# dataset
+# chainl[1](time', sol3.estimated_nn_params[1], st)[1][1,:]
+# plot!(time1, chainl[1](time1', sol3.estimated_nn_params[1], st)[1][1,:])
+# plot!(time1, chainl[2](time1', sol3.estimated_nn_params[2], st)[1][1,:])
+# plot!(time1, chainl[1](time1', sol5.estimated_nn_params[1], st)[1][1,:])
+# plot!(time1, chainl[2](time1', sol5.estimated_nn_params[2], st)[1][1,:])
+# time1 = collect(0.0:(1 / 100.0):8.0)
+
+sol4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], progress = true
+)
+
+
+sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.15, 0.15],
+    phystd = [0.15, 0.15], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.05, 0.05],
+    phystd = [0.05, 0.05], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# 100 points(sol5_2 vs sol3)
+sol5_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# 100 points(sol5_2 vs sol3)
+sol5_2_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.08, 0.08],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# 100 points(sol5_2 vs sol3)
+sol5_2_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# 50 datapoint 0-5 sol5 vs sol4
+# julia> sol4.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.549 ± 0.0058
+#  0.71 ± 0.0042
+#  0.408 ± 0.0063
+#  0.355 ± 0.0015
+
+# julia> sol5.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0052
+#  0.702 ± 0.0034
+#  0.346 ± 0.0037
+#  0.335 ± 0.0013
+
+# 100 datapoint 0-5 sol5_2 vs sol3
+# julia> sol3.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.598 ± 0.0037
+#  0.711 ± 0.0027
+#  0.399 ± 0.0032
+#  0.333 ± 0.0011
+
+# julia> sol5_2.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0035
+#  0.686 ± 0.0026
+#  0.395 ± 0.0029
+#  0.328 ± 0.00095
+
+# timespan for full dataset (0-8)
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true)
+
+sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true
+)
+
+sol7 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], progress = true
+)
+
+sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+    
+sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+ 
+sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+lpfun = function f(chain::Chains) # function to compute the logpdf values
+    niter, nparams, nchains = size(chain)
+    lp = zeros(niter + nchains) # resulting logpdf values
+    for i = 1:nparams
+        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,1]')
+        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,2]')
+    end
+    return lp
+end
+
+DIC, pD = dic(sol3.original.mcmc_chain, lpfun)
+DIC1, pD1 = dic(sol4.original.mcmc_chain, lpfun)
+
+size(sol3.original.mcmc_chain)
+Array(sol3.original.mcmc_chain[1,:,:])
+length(sol3.estimated_nn_params[1])
+chainl[1](time', sol3.estimated_nn_params[1], st)[1]
+
+data = [hcat(calculate_derivatives2(dataset[i][:, 2], dataset[1][:, 1]),dataset[i][:, 2]) for i in eachindex(dataset)]
+dataset[1][:,1]
+dataset[2]
+plot!(dataset[1][:,2],dataset[1][:,1])
+eqs
+sol5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.02, 0.02],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(3, 2),
+        Normal(3, 2)
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2)
+    ], progress = true)
+
+# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
+# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
+
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.5, 0.5],
+    phystd = [0.5, 0.5], l2std = [0.02, 0.02],
+    priorsNNw = (0.0, 5.0), phystdnew = [0.5, 0.5],
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),aa
+    param = [
+        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
+        # Normal(3, 2),
+        # Normal(4, 2),
+        Normal(3, 2),
+        Normal(3, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+function calculate_derivatives2(indvar,depvar)
+    x̂, time = indvar,depvar
+    num_points = length(x̂)
+    # Initialize an array to store the derivative values.
+    derivatives = similar(x̂)
+
+    for i in 2:(num_points - 1)
+        # Calculate the first-order derivative using central differences.
+        Δt_forward = time[i + 1] - time[i]
+        Δt_backward = time[i] - time[i - 1]
+
+        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+        derivatives[i] = derivative
+    end
+
+    # Derivatives at the endpoints can be calculated using forward or backward differences.
+    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+    return derivatives
+end
+dataset[1]
+dataset[2]
+dataset[1][:,1]=calculate_derivatives2(dataset[1][:,2], dataset[1][:,1])
+dataset[2][:,1]=calculate_derivatives2(dataset[2][:,2], dataset[2][:,1])
+dataset[1]
+dataset[2]
+sol7 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.5, 0.5],
+    phystd = [0.5, 0.5], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(0, 2),
+        Normal(0, 2)
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2)
+    ], progress = true)
+
+# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
+# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
+
+sol8 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),aa
+    param = [
+        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
+        # Normal(3, 2),
+        # Normal(4, 2),
+        Normal(0, 2),
+        Normal(0, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+timepoints = collect(0.0:(1 / 100.0):9.0)
+plot!(timepoints', chainl[1](timepoints', sol5_4.estimated_nn_params[1], st)[1])
+plot!(timepoints, chainl[2](timepoints', sol5_4.estimated_nn_params[2], st)[1])
+
+using Plots, StatsPlots
+plotly()
+
+plot(time, u[1, :])
+plot!(time, u[2, :])
+scatter!(time, u_noisy[1, :])
+scatter!(time, u_noisy[2, :])
+scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
+scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
+
+# plot28(sol4 seems better vs sol3 plots, params seems similar)
+plot!(sol3.timepoints[1]', sol3.ensemblesol[1],legend=nothing)
+plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
+plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
+plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
+
+plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1],legend=nothing)
+plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
+plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1],legend=nothing)
+plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
+
+plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1],legend=nothing)
+plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
+plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
+plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
+plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1],legend=nothing)
+plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
+
+
+# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
+plot!(sol5.timepoints[1]', sol5.ensemblesol[1],legend=nothing)
+plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
+plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
+plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
+
+# plot52 sol7 vs sol5(sol5 overall better plots, params?)
+plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
+plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
+
+# sol8,sol8_2,sol9,sol9_2 bad
+plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
+plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
+plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
+plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
+
+plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
+plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
+plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
+plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
+
+
+plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
+plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2],legend=nothing)
+
+plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
+plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2],legend=nothing)
+plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
+plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
+
+plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
+plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
+
+plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1],legend=nothing)
+plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
+plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1],legend=nothing)
+plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
+
+plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
+plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2],legend=nothing)
+
+plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1])
+plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2],legend=nothing)
+
+# test with lower number of points
+# test same calls 2 times or more
+# consider full range dataset case
+# combination of all above
+
+# run 1 100 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol3.estimated_de_params
+sol4.estimated_de_params
+
+# p = [2/3, 2/3, 1/3, 1/3]
+sol3.estimated_de_params
+sol4.estimated_de_params
+dataset[1]
+eqs
+α, β, γ, δ = p
+p
+#  1.0
+#  0.6666666666666666
+#  1.0
+#  0.33333333333333333
+
+1/a
+1/c
+eqs
+using StatsPlots
+plotly()
+plot(sol3.original.mcmc_chain)
+plot(sol4.original.mcmc_chain)
+
+# 4-element Vector{Particles{Float64, 34}}:
+#  1.23 ± 0.022
+#  0.858 ± 0.011
+#  3.04 ± 0.079
+#  1.03 ± 0.024
+# 4-element Vector{Particles{Float64, 34}}:
+#  1.2 ± 0.0069
+#  0.835 ± 0.006
+#  3.22 ± 0.01
+#  1.08 ± 0.0053
+# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
+# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
+
+# sol3 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# sol = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     # Kernel = AdvancedHMC.NUTS(0.8),
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ], progress = true)
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
+# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
+
+sol = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2)
+    ])
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    phystdnew = [0.5, 0.5],
+    #  Kernel = AdvancedHMC.NUTS(0.8),
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2)
+    ],
+    Dict_differentials = Dict_differentials)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+# points1 = []
+# for eq_arg in eq_args
+#     a = []
+#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
+#     for i in eachindex(symbols_input)
+#         if symbols_input[i][2] == eq_arg
+#             # include domain points of that depvar
+#             # each loss equation take domain matrix [points..;points..]
+#             push!(a, train_sets[i][:, 2:end]')
+#         end
+#     end
+#     # vcat as new row for next equation
+#     push!(points1, vcat(a...))
+# end
+# println(points1 == points)
+
+# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+# import ModelingToolkit: Interval, infimum, supremum, Distributions
+# using Plots, MonteCarloMeasurements
+
+# @parameters x, t, α
+# @variables u(..)
+# Dt = Differential(t)
+# Dx = Differential(x)
+# Dx2 = Differential(x)^2
+# Dx3 = Differential(x)^3
+# Dx4 = Differential(x)^4
+
+# # α = 1
+# β = 4
+# γ = 1
+# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+# bcs = [u(x, 0) ~ u_analytic(x, 0),
+#     u(-10, t) ~ u_analytic(-10, t),
+#     u(10, t) ~ u_analytic(10, t),
+#     Dx(u(-10, t)) ~ du(-10, t),
+#     Dx(u(10, t)) ~ du(10, t)]
+
+# # Space and time domains
+# domains = [x ∈ Interval(-10.0, 10.0),
+#     t ∈ Interval(0.0, 1.0)]
+
+# # Discretization
+# dx = 0.4;
+# dt = 0.2;
+
+# # Function to compute analytical solution at a specific point (x, t)
+# function u_analytic_point(x, t)
+#     z = -x / 2 + t
+#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# end
+
+# # Function to generate the dataset matrix
+# function generate_dataset_matrix(domains, dx, dt)
+#     x_values = -10:dx:10
+#     t_values = 0.0:dt:1.0
+
+#     dataset = []
+
+#     for t in t_values
+#         for x in x_values
+#             u_value = u_analytic_point(x, t)
+#             push!(dataset, [u_value, x, t])
+#         end
+#     end
+
+#     return vcat([data' for data in dataset]...)
+# end
+
+# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
+
+# # noise to dataset
+# noisydataset = deepcopy(datasetpde)
+# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
+#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
+#                         noisydataset[1][:, 1]
+
+# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+# # Neural network
+# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+#     Lux.Dense(8, 8, Lux.tanh),
+#     Lux.Dense(8, 1))
+
+# discretization = NeuralPDE.BayesianPINN([chain],
+#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+# @named pde_system = PDESystem(eq,
+#     bcs,
+#     domains,
+#     [x, t],
+#     [u(x, t)],
+#     [α],
+#     defaults = Dict([α => 0.5]))
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
+
+# eqs = pde_system.eqs
+# Dict_differentials = Dict()
+# exps = toexpr.(eqs)
+# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+# sol2 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
+#     param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
+#     progress = true)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)
+
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+    dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)], progress = true)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=0.1
+@test mean(u_predict .- u_real) < 0.01
+@test sol1.estimated_de_params[1]≈param atol=0.1
+sol1.estimated_de_params[1]
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+param = 2 * π
+ts_2 = vec(sol2.timepoints[1])
+u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict_2 = pmean(sol2.ensemblesol[1])
+
+@test u_predict_2≈u_real_2 atol=0.1
+@test mean(u_predict_2 .- u_real_2) < 0.01
+@test sol2.estimated_de_params[1]≈param atol=0.1
+sol2.estimated_de_params[1]
+
+plot(ts_2, u_predict_2)
+plot!(ts_2, u_real_2)
+
+@parameters t, σ_
+@variables x(..), y(..), z(..)
+Dt = Differential(t)
+eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 1.0)]
+
+input_ = length(domains)
+n = 7
+chain = [
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1))
+]
+
+#Generate Data
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
+end
+
+u0 = [1.0; 0.0; 0.0]
+tspan = (0.0, 1.0)
+prob = ODEProblem(lorenz!, u0, tspan)
+sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+ts = sol.t
+us = hcat(sol.u...)
+us = us .+ ((0.05 .* randn(size(us))) .* us)
+ts_ = hcat(sol(ts).t...)[1, :]
+dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+    dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs, bcs, domains,
+    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100,
+    bcstd = [0.3, 0.3, 0.3],
+    phystd = [0.1, 0.1, 0.1],
+    l2std = [1, 1, 1],
+    priorsNNw = (0.0, 1.0),
+    saveats = [0.01],
+    param = [Normal(14.0, 2)], progress = true)
+
+idealp = 10.0
+p_ = sol1.estimated_de_params[1]
+@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+
+@parameters x y
+@variables u(..)
+Dxx = Differential(x)^2
+Dyy = Differential(y)^2
+
+# 2D PDE
+eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# Boundary conditions
+bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+
+# Space and time domains
+domains = [x ∈ Interval(0.0, 1.0),
+    y ∈ Interval(0.0, 1.0)]
+
+# Neural network
+dim = 2 # number of dimensions
+chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
+
+# Discretization
+dx = 0.04
+discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
+
+@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 5,
+    bcstd = [0.01, 0.01, 0.01, 0.01],
+    phystd = [0.005],
+    priorsNNw = (0.0, 2.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+xs = sol1.timepoints[1]
+sol1.ensemblesol[1]
+analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+
+dataset = hcat(u_real, xs')
+u_predict = pmean(sol1.ensemblesol[1])
+u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
+@test u_predict≈u_real atol=0.8
\ No newline at end of file

From 3e96e3db065e0d618996efc1c4e6318febdf78da Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 29 Mar 2024 22:25:27 +0530
Subject: [PATCH 039/107] update BPINN_PDEinvsol_tests.jl

---
 test/BPINN_PDEinvsol_tests.jl | 1113 +--------------------------------
 1 file changed, 1 insertion(+), 1112 deletions(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 1fadc8a0a5..748ff21686 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -242,1115 +242,4 @@ u_predict = pmean(sol2.ensemblesol[1])
 
 @test u_predict≈u_real atol=1.5
 @test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-
-
-println("Example 3: Lotka Volterra with New parameter estimation")
-@parameters t α β γ δ
-@variables x(..) y(..)
-
-Dt = Differential(t)
-eqs = [Dt(x(t))*α  ~ x(t) - β * x(t) * y(t), Dt(y(t))*γ  ~ δ * x(t) * y(t) - y(t)]
-bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
-domains = [t ∈ Interval(0.0, 7.0)]
-
-# Define the parameters' values
-# params = [α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]
-# p = [1.5, 1.0, 3.0, 1.0]
-
-chainl = [
-    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),Lux.Dense(6, 1)),
-    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),Lux.Dense(6, 1))
-]
-
-initl, st = Lux.setup(Random.default_rng(), chainl[1])
-initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
-
-using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
-
-function lotka_volterra(u, p, t)
-    # Model parameters. 
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-# initial-value problem.
-u0 = [1.0, 1.0]
-# p = [2/3, 2/3, 1/3.0, 1/3.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 7.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-dt = 0.01
-solution = solve(prob, Tsit5(); saveat = dt)
-
-
-# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
-#     smoothed_data = similar(data, T, length(data))
-
-#     for i in 1:length(data)
-#         start_idx = max(1, i - window_size)
-#         end_idx = min(length(data), i + window_size)
-#         smoothed_data[i] = mean(data[start_idx:end_idx])
-#     end
-
-#     return smoothed_data'
-# end
-
-# Extract solution
-time = solution.t
-u = hcat(solution.u...)
-time1=solution.t
-u_noisy = u .+ u .* (0.3 .* randn(size(u)))
-
-plot(time,u[1,:])
-plot!(time,u[2,:])
-scatter!(time1,u_noisy[1,:])
-scatter!(time1,u_noisy[2,:])
-
-# window_size = 5
-# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
-#                      for i in 1:length(solution.u[1])]
-# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
-# Randomly select some points from the solution
-num_points = 150  # Number of points to select
-selected_indices = rand(1:size(u_noisy, 2), num_points)
-upoints = [u_noisy[:, i] for i in selected_indices]
-timepoints = [time[i] for i in selected_indices]
-temp=hcat(upoints...)
-dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
-
-# plot(time,u[1,:])
-# plot!(time,u[2,:])
-
-discretization = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [x(t), y(t)],
-    [α, β, γ, δ],
-    defaults = Dict([α =>2, β => 3, γ =>3, δ =>2]))
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], progress = true)
-
-# time
-# dataset
-# chainl[1](time', sol3.estimated_nn_params[1], st)[1][1,:]
-# plot!(time1, chainl[1](time1', sol3.estimated_nn_params[1], st)[1][1,:])
-# plot!(time1, chainl[2](time1', sol3.estimated_nn_params[2], st)[1][1,:])
-# plot!(time1, chainl[1](time1', sol5.estimated_nn_params[1], st)[1][1,:])
-# plot!(time1, chainl[2](time1', sol5.estimated_nn_params[2], st)[1][1,:])
-# time1 = collect(0.0:(1 / 100.0):8.0)
-
-sol4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], progress = true
-)
-
-
-sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.15, 0.15],
-    phystd = [0.15, 0.15], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.05, 0.05],
-    phystd = [0.05, 0.05], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# 100 points(sol5_2 vs sol3)
-sol5_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# 100 points(sol5_2 vs sol3)
-sol5_2_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.08, 0.08],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# 100 points(sol5_2 vs sol3)
-sol5_2_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# 50 datapoint 0-5 sol5 vs sol4
-# julia> sol4.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.549 ± 0.0058
-#  0.71 ± 0.0042
-#  0.408 ± 0.0063
-#  0.355 ± 0.0015
-
-# julia> sol5.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0052
-#  0.702 ± 0.0034
-#  0.346 ± 0.0037
-#  0.335 ± 0.0013
-
-# 100 datapoint 0-5 sol5_2 vs sol3
-# julia> sol3.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.598 ± 0.0037
-#  0.711 ± 0.0027
-#  0.399 ± 0.0032
-#  0.333 ± 0.0011
-
-# julia> sol5_2.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0035
-#  0.686 ± 0.0026
-#  0.395 ± 0.0029
-#  0.328 ± 0.00095
-
-# timespan for full dataset (0-8)
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true)
-
-sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true
-)
-
-sol7 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], progress = true
-)
-
-sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-    
-sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
- 
-sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-lpfun = function f(chain::Chains) # function to compute the logpdf values
-    niter, nparams, nchains = size(chain)
-    lp = zeros(niter + nchains) # resulting logpdf values
-    for i = 1:nparams
-        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,1]')
-        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,2]')
-    end
-    return lp
-end
-
-DIC, pD = dic(sol3.original.mcmc_chain, lpfun)
-DIC1, pD1 = dic(sol4.original.mcmc_chain, lpfun)
-
-size(sol3.original.mcmc_chain)
-Array(sol3.original.mcmc_chain[1,:,:])
-length(sol3.estimated_nn_params[1])
-chainl[1](time', sol3.estimated_nn_params[1], st)[1]
-
-data = [hcat(calculate_derivatives2(dataset[i][:, 2], dataset[1][:, 1]),dataset[i][:, 2]) for i in eachindex(dataset)]
-dataset[1][:,1]
-dataset[2]
-plot!(dataset[1][:,2],dataset[1][:,1])
-eqs
-sol5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.02, 0.02],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(3, 2),
-        Normal(3, 2)
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2)
-    ], progress = true)
-
-# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
-# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
-
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.5, 0.5],
-    phystd = [0.5, 0.5], l2std = [0.02, 0.02],
-    priorsNNw = (0.0, 5.0), phystdnew = [0.5, 0.5],
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),aa
-    param = [
-        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
-        # Normal(3, 2),
-        # Normal(4, 2),
-        Normal(3, 2),
-        Normal(3, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-function calculate_derivatives2(indvar,depvar)
-    x̂, time = indvar,depvar
-    num_points = length(x̂)
-    # Initialize an array to store the derivative values.
-    derivatives = similar(x̂)
-
-    for i in 2:(num_points - 1)
-        # Calculate the first-order derivative using central differences.
-        Δt_forward = time[i + 1] - time[i]
-        Δt_backward = time[i] - time[i - 1]
-
-        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-        derivatives[i] = derivative
-    end
-
-    # Derivatives at the endpoints can be calculated using forward or backward differences.
-    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-    return derivatives
-end
-dataset[1]
-dataset[2]
-dataset[1][:,1]=calculate_derivatives2(dataset[1][:,2], dataset[1][:,1])
-dataset[2][:,1]=calculate_derivatives2(dataset[2][:,2], dataset[2][:,1])
-dataset[1]
-dataset[2]
-sol7 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.5, 0.5],
-    phystd = [0.5, 0.5], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(0, 2),
-        Normal(0, 2)
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2)
-    ], progress = true)
-
-# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
-# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
-
-sol8 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),aa
-    param = [
-        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
-        # Normal(3, 2),
-        # Normal(4, 2),
-        Normal(0, 2),
-        Normal(0, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-timepoints = collect(0.0:(1 / 100.0):9.0)
-plot!(timepoints', chainl[1](timepoints', sol5_4.estimated_nn_params[1], st)[1])
-plot!(timepoints, chainl[2](timepoints', sol5_4.estimated_nn_params[2], st)[1])
-
-using Plots, StatsPlots
-plotly()
-
-plot(time, u[1, :])
-plot!(time, u[2, :])
-scatter!(time, u_noisy[1, :])
-scatter!(time, u_noisy[2, :])
-scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
-scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
-
-# plot28(sol4 seems better vs sol3 plots, params seems similar)
-plot!(sol3.timepoints[1]', sol3.ensemblesol[1],legend=nothing)
-plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
-plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
-plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
-
-plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1],legend=nothing)
-plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
-plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1],legend=nothing)
-plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
-
-plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1],legend=nothing)
-plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
-plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
-plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
-plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1],legend=nothing)
-plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
-
-
-# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
-plot!(sol5.timepoints[1]', sol5.ensemblesol[1],legend=nothing)
-plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
-plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
-plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
-
-# plot52 sol7 vs sol5(sol5 overall better plots, params?)
-plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
-plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
-
-# sol8,sol8_2,sol9,sol9_2 bad
-plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
-plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
-plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
-plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
-
-plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
-plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
-plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
-plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
-
-
-plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
-plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2],legend=nothing)
-
-plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
-plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2],legend=nothing)
-plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
-plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
-
-plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
-plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
-
-plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1],legend=nothing)
-plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
-plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1],legend=nothing)
-plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
-
-plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
-plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2],legend=nothing)
-
-plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1])
-plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2],legend=nothing)
-
-# test with lower number of points
-# test same calls 2 times or more
-# consider full range dataset case
-# combination of all above
-
-# run 1 100 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol3.estimated_de_params
-sol4.estimated_de_params
-
-# p = [2/3, 2/3, 1/3, 1/3]
-sol3.estimated_de_params
-sol4.estimated_de_params
-dataset[1]
-eqs
-α, β, γ, δ = p
-p
-#  1.0
-#  0.6666666666666666
-#  1.0
-#  0.33333333333333333
-
-1/a
-1/c
-eqs
-using StatsPlots
-plotly()
-plot(sol3.original.mcmc_chain)
-plot(sol4.original.mcmc_chain)
-
-# 4-element Vector{Particles{Float64, 34}}:
-#  1.23 ± 0.022
-#  0.858 ± 0.011
-#  3.04 ± 0.079
-#  1.03 ± 0.024
-# 4-element Vector{Particles{Float64, 34}}:
-#  1.2 ± 0.0069
-#  0.835 ± 0.006
-#  3.22 ± 0.01
-#  1.08 ± 0.0053
-# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
-# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
-
-# sol3 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# sol = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     # Kernel = AdvancedHMC.NUTS(0.8),
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ], progress = true)
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
-# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
-
-sol = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ])
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    phystdnew = [0.5, 0.5],
-    #  Kernel = AdvancedHMC.NUTS(0.8),
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ],
-    Dict_differentials = Dict_differentials)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-# points1 = []
-# for eq_arg in eq_args
-#     a = []
-#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
-#     for i in eachindex(symbols_input)
-#         if symbols_input[i][2] == eq_arg
-#             # include domain points of that depvar
-#             # each loss equation take domain matrix [points..;points..]
-#             push!(a, train_sets[i][:, 2:end]')
-#         end
-#     end
-#     # vcat as new row for next equation
-#     push!(points1, vcat(a...))
-# end
-# println(points1 == points)
-
-# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-# import ModelingToolkit: Interval, infimum, supremum, Distributions
-# using Plots, MonteCarloMeasurements
-
-# @parameters x, t, α
-# @variables u(..)
-# Dt = Differential(t)
-# Dx = Differential(x)
-# Dx2 = Differential(x)^2
-# Dx3 = Differential(x)^3
-# Dx4 = Differential(x)^4
-
-# # α = 1
-# β = 4
-# γ = 1
-# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-# bcs = [u(x, 0) ~ u_analytic(x, 0),
-#     u(-10, t) ~ u_analytic(-10, t),
-#     u(10, t) ~ u_analytic(10, t),
-#     Dx(u(-10, t)) ~ du(-10, t),
-#     Dx(u(10, t)) ~ du(10, t)]
-
-# # Space and time domains
-# domains = [x ∈ Interval(-10.0, 10.0),
-#     t ∈ Interval(0.0, 1.0)]
-
-# # Discretization
-# dx = 0.4;
-# dt = 0.2;
-
-# # Function to compute analytical solution at a specific point (x, t)
-# function u_analytic_point(x, t)
-#     z = -x / 2 + t
-#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# end
-
-# # Function to generate the dataset matrix
-# function generate_dataset_matrix(domains, dx, dt)
-#     x_values = -10:dx:10
-#     t_values = 0.0:dt:1.0
-
-#     dataset = []
-
-#     for t in t_values
-#         for x in x_values
-#             u_value = u_analytic_point(x, t)
-#             push!(dataset, [u_value, x, t])
-#         end
-#     end
-
-#     return vcat([data' for data in dataset]...)
-# end
-
-# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
-
-# # noise to dataset
-# noisydataset = deepcopy(datasetpde)
-# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
-#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
-#                         noisydataset[1][:, 1]
-
-# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-# # Neural network
-# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-#     Lux.Dense(8, 8, Lux.tanh),
-#     Lux.Dense(8, 1))
-
-# discretization = NeuralPDE.BayesianPINN([chain],
-#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-# @named pde_system = PDESystem(eq,
-#     bcs,
-#     domains,
-#     [x, t],
-#     [u(x, t)],
-#     [α],
-#     defaults = Dict([α => 0.5]))
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
-
-# eqs = pde_system.eqs
-# Dict_differentials = Dict()
-# exps = toexpr.(eqs)
-# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-# sol2 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
-#     param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
-#     progress = true)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
-
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-    dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)], progress = true)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=0.1
-@test mean(u_predict .- u_real) < 0.01
-@test sol1.estimated_de_params[1]≈param atol=0.1
-sol1.estimated_de_params[1]
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-param = 2 * π
-ts_2 = vec(sol2.timepoints[1])
-u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict_2 = pmean(sol2.ensemblesol[1])
-
-@test u_predict_2≈u_real_2 atol=0.1
-@test mean(u_predict_2 .- u_real_2) < 0.01
-@test sol2.estimated_de_params[1]≈param atol=0.1
-sol2.estimated_de_params[1]
-
-plot(ts_2, u_predict_2)
-plot!(ts_2, u_real_2)
-
-@parameters t, σ_
-@variables x(..), y(..), z(..)
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-
-input_ = length(domains)
-n = 7
-chain = [
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1))
-]
-
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
-end
-
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-ts = sol.t
-us = hcat(sol.u...)
-us = us .+ ((0.05 .* randn(size(us))) .* us)
-ts_ = hcat(sol(ts).t...)[1, :]
-dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-    dataset = [dataset, nothing])
-
-@named pde_system = PDESystem(eqs, bcs, domains,
-    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100,
-    bcstd = [0.3, 0.3, 0.3],
-    phystd = [0.1, 0.1, 0.1],
-    l2std = [1, 1, 1],
-    priorsNNw = (0.0, 1.0),
-    saveats = [0.01],
-    param = [Normal(14.0, 2)], progress = true)
-
-idealp = 10.0
-p_ = sol1.estimated_de_params[1]
-@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-
-@parameters x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-
-# 2D PDE
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# Boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
-
-# Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
-
-# Neural network
-dim = 2 # number of dimensions
-chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
-
-# Discretization
-dx = 0.04
-discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
-
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 5,
-    bcstd = [0.01, 0.01, 0.01, 0.01],
-    phystd = [0.005],
-    priorsNNw = (0.0, 2.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-xs = sol1.timepoints[1]
-sol1.ensemblesol[1]
-analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-
-dataset = hcat(u_real, xs')
-u_predict = pmean(sol1.ensemblesol[1])
-u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-@test u_predict≈u_real atol=0.8
\ No newline at end of file
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file

From efbccda4297d4c89d1515ea943aee4df0b665bb7 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 29 Mar 2024 23:47:36 +0530
Subject: [PATCH 040/107] update training_strategies.jl

---
 src/training_strategies.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index e33f490fe7..d128d7bb5f 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -48,7 +48,7 @@ end
 
 # include dataset points in pde_residual loglikelihood (BayesianPINN)
 function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
-        strategy::GridTraining,
+        strategy,
         datafree_pde_loss_function,
         datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc = nothing)
     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep

From 39ed5f6d58b4cdba3807a773d5b73b1d7e4d605d Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 30 Mar 2024 00:06:01 +0530
Subject: [PATCH 041/107] update BPINN_PDEinvsol_tests.jl

---
 src/training_strategies.jl    |  2 +-
 test/BPINN_PDEinvsol_tests.jl | 73 ++++++++++++++++++-----------------
 2 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index d128d7bb5f..e33f490fe7 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -48,7 +48,7 @@ end
 
 # include dataset points in pde_residual loglikelihood (BayesianPINN)
 function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
-        strategy,
+        strategy::GridTraining,
         datafree_pde_loss_function,
         datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc = nothing)
     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 748ff21686..5cc8fe95fe 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -34,42 +34,43 @@ Random.seed!(100)
     u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
     dataset = [hcat(u1, timepoints)]
 
-    # checking all training strategies
-    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
+    # TODO: correct implementations
+    # # checking all training strategies
+    # discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
+    #     dataset = [dataset, nothing])
+
+    # ahmc_bayesian_pinn_pde(pde_system,
+    #     discretization;
+    #     draw_samples = 1500,
+    #     bcstd = [0.05],
+    #     phystd = [0.01], l2std = [0.01],
+    #     priorsNNw = (0.0, 1.0),
+    #     saveats = [1 / 50.0],
+    #     param = [LogNormal(6.0, 0.5)])
+
+    # discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
+    #     dataset = [dataset, nothing])
+
+    # ahmc_bayesian_pinn_pde(pde_system,
+    #     discretization;
+    #     draw_samples = 1500,
+    #     bcstd = [0.05],
+    #     phystd = [0.01], l2std = [0.01],
+    #     priorsNNw = (0.0, 1.0),
+    #     saveats = [1 / 50.0],
+    #     param = [LogNormal(6.0, 0.5)])
+
+    # discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
+    #     dataset = [dataset, nothing])
+
+    # ahmc_bayesian_pinn_pde(pde_system,
+    #     discretization;
+    #     draw_samples = 1500,
+    #     bcstd = [0.05],
+    #     phystd = [0.01], l2std = [0.01],
+    #     priorsNNw = (0.0, 1.0),
+    #     saveats = [1 / 50.0],
+    #     param = [LogNormal(6.0, 0.5)])
 
     discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
         dataset = [dataset, nothing])

From 27310635d9510c99e14ca61b6a278e2640c7a65b Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 5 May 2024 01:50:15 +0530
Subject: [PATCH 042/107] changes from reviews

---
 src/collocated_estim.jl | 123 +---------------------------------------
 1 file changed, 2 insertions(+), 121 deletions(-)

diff --git a/src/collocated_estim.jl b/src/collocated_estim.jl
index 3902f74a27..0fe608e951 100644
--- a/src/collocated_estim.jl
+++ b/src/collocated_estim.jl
@@ -1,56 +1,14 @@
-# suggested extra loss function
+# suggested extra loss function for ODE solver case
 function L2loss2(Tar::LogTargetDensity, θ)
     f = Tar.prob.f
 
     # parameter estimation chosen or not
     if Tar.extraparams > 0
-        # deri_sol = deri_sol'
         autodiff = Tar.autodiff
-        # # Timepoints to enforce Physics
-        # dataset = Array(reduce(hcat, dataset)')
-        # t = dataset[end, :]
-        # û = dataset[1:(end - 1), :]
-
-        # ode_params = Tar.extraparams == 1 ?
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        # if length(û[:, 1]) == 1
-        #     physsol = [f(û[:, i][1],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # else
-        #     physsol = [f(û[:, i],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # end
-        # #form of NN output matrix output dim x n
-        # deri_physsol = reduce(hcat, physsol)
-
-        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
-        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
-        # if length(û[:, 1]) == 1
-        #     deri_sol = [f(û[:, i][1],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # else
-        #     deri_sol = [f(û[:, i],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # end
-        # deri_sol = reduce(hcat, deri_sol) 
-        # deri_sol = reduce(hcat, derivatives)
-
         # Timepoints to enforce Physics 
         t = Tar.dataset[end]
         u1 = Tar.dataset[2]
         û = Tar.dataset[1]
-        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
-        #  
 
         nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
 
@@ -71,24 +29,7 @@ function L2loss2(Tar::LogTargetDensity, θ)
         end
         #form of NN output matrix output dim x n 
         deri_physsol = reduce(hcat, physsol)
-
-        # if length(Tar.prob.u0) == 1
-        #     nnsol = [f(û[i],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # else
-        #     nnsol = [f([û[i], u1[i]],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # end
-        # form of NN output matrix output dim x n
-        # nnsol = reduce(hcat, nnsol)
-
-        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
-        # # convert to matrix as nnsol  
-
+   
         physlogprob = 0
         for i in 1:length(Tar.prob.u0)
             # can add phystd[i] for u[i] 
@@ -102,64 +43,4 @@ function L2loss2(Tar::LogTargetDensity, θ)
     else
         return 0
     end
-end
-
-# PDE(DU,U,P,T)=0
-
-# Derivated via Central Diff
-# function calculate_derivatives2(dataset)
-#     x̂, time = dataset
-#     num_points = length(x̂)
-#     # Initialize an array to store the derivative values.
-#     derivatives = similar(x̂)
-
-#     for i in 2:(num_points - 1)
-#         # Calculate the first-order derivative using central differences.
-#         Δt_forward = time[i + 1] - time[i]
-#         Δt_backward = time[i] - time[i - 1]
-
-#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-#         derivatives[i] = derivative
-#     end
-
-#     # Derivatives at the endpoints can be calculated using forward or backward differences.
-#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-#     return derivatives
-# end
-
-function calderivatives(prob, dataset)
-    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
-        Flux.Dense(8, 2)) |> Flux.f64
-    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-    function loss(x, y)
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
-        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
-        sum(Flux.mse.(chainflux(x), y))
-    end
-    optimizer = Flux.Optimise.ADAM(0.01)
-    epochs = 3000
-    for epoch in 1:epochs
-        Flux.train!(loss,
-            Flux.params(chainflux),
-            [(dataset[end]', dataset[1:(end - 1)])],
-            optimizer)
-    end
-
-    # A1 = (prob.u0' .+
-    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
-    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
-
-    # A2 = (prob.u0' .+
-    #       (prob.tspan[2] .- (dataset[end]'))' .*
-    #       chainflux(dataset[end]')')
-
-    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
-    A2 = chainflux(dataset[end]')
-
-    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
-
-    return gradients
 end
\ No newline at end of file

From a90c7304f95ebe49665cce2b894c94fbf7097d17 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 9 May 2024 02:07:58 +0530
Subject: [PATCH 043/107] Testing code for BPINN PDEs

---
 test/BPINN_PDEinvsol_tests.jl | 2397 ++++++++++++++++++++++++++++++++-
 1 file changed, 2396 insertions(+), 1 deletion(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 5cc8fe95fe..e756a3861f 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -243,4 +243,2399 @@ u_predict = pmean(sol2.ensemblesol[1])
 
 @test u_predict≈u_real atol=1.5
 @test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+
+println("Example 3: Lotka Volterra with New parameter estimation")
+@parameters t α β γ δ
+@variables x(..) y(..)
+
+Dt = Differential(t)
+eqs = [Dt(x(t)) * α  ~ x(t) - β * x(t) * y(t), Dt(y(t)) * δ ~ x(t) * y(t) - y(t)*γ ]
+bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
+domains = [t ∈ Interval(0.0, 7.0)]
+
+# Define the parameters' values
+# α, β, γ, δ = p
+
+# regular equations
+# dx = (1.5 - y) * x # prey
+# dy = (x - 3.0) * y # predator
+# p = [1.5, 1.0, 3.0, 1.0] non transformed values
+
+# transformed equations
+# dx*0.666 = (1 - 0.666 * y) * x # prey
+# dy*1.0 = (x - 3.0) * y # predator
+# p = [0.666, 0.666, 3.0, 1.0] transformed values (change is scale also ensured!)
+
+chainl = [
+    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin),Lux.Dense(5, 1)),
+    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin),Lux.Dense(5, 1))
+]
+
+initl, st = Lux.setup(Random.default_rng(), chainl[1])
+initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
+
+using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    # Model parameters. 
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+# initial-value problem.
+u0 = [1.0, 1.0]
+# p = [2/3, 2/3, 1/3.0, 1/3.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 7.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+solution1 = solve(prob, Tsit5(); saveat = 0.02)
+
+function calculate_errors(approx_sol, solution_points)
+  # Check vector lengths match
+  if length(approx_sol) != length(solution_points)
+    error("Vectors must have the same length")
+  end
+
+  # Calculate errors
+  n = length(approx_sol)
+  errors = randn(n)
+  for i in 1:n
+    errors[i] = solution_points[i] - approx_sol[i]
+  end
+
+  # Calculate RMSE
+  rmse = sqrt(mean(errors.^2))
+
+  # Calculate MAE
+  mae = mean(abs.(errors))
+
+  # Calculate maximum absolute error
+  max_error = maximum(abs.(errors))
+
+  # Return dictionary with errors
+  return Dict(
+      "RMSE" => rmse,
+      "MAE" => mae,
+      "Max Abs Error" => max_error,
+  )
+end
+u = hcat(solution1.u...)
+u[1,:]
+sol6_2.ensemblesol[1]
+
+a1=calculate_errors(pmean(sol6_1.ensemblesol[1]), u1[1,:])
+b1=calculate_errors(pmean(sol6_1.ensemblesol[2]), u1[2,:])
+
+a=calculate_errors(pmean(sol6_2.ensemblesol[1]), u[1,:])
+b=calculate_errors(pmean(sol6_2.ensemblesol[2]), u[2,:])
+
+c=calculate_errors(pmean(sol6_L2_2.ensemblesol[1]), u[1,:])
+d=calculate_errors(pmean(sol6_L2_2.ensemblesol[2]), u[2,:])
+
+e=calculate_errors(pmean(sol6_L2_1.ensemblesol[1]), u[1,:])
+f=calculate_errors(pmean(sol6_L2_1.ensemblesol[2]), u[2,:])
+
+g=calculate_errors(pmean(sol6_L2.ensemblesol[1]), u[1,:])
+h=calculate_errors(pmean(sol6_L2.ensemblesol[2]), u[2,:])
+sol6_2.ensemblesol[1]
+sol6_2.ensemblesol[2]
+
+sol6_L2.ensemblesol[1]
+sol6_L2.ensemblesol[2]
+
+# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
+#     smoothed_data = similar(data, T, length(data))
+
+#     for i in 1:length(data)
+#         start_idx = max(1, i - window_size)
+#         end_idx = min(length(data), i + window_size)
+#         smoothed_data[i] = mean(data[start_idx:end_idx])
+#     end
+
+#     return smoothed_data'
+# end
+
+# Extract solution
+time = solution.t
+u = hcat(solution.u...)
+time1=solution.t
+u_noisy = u .+ u .* (0.2 .* randn(size(u)))
+u_noisy0 = u .+ (3.0 .* rand(size(u)[1],size(u)[2]) .- 1.5)
+u_noisy1 = u .+ (0.8.* randn(size(Array(solution))))
+u_noisy2 = u .+ (0.5.* randn(size(Array(solution))))
+
+plot(time,u[1,:])
+plot!(time,u[2,:])
+scatter!(time1,u_noisy0[1,:])
+scatter!(time1,u_noisy0[2,:])
+scatter!(discretization_08_gaussian.dataset[1][1][:,2], discretization_08_gaussian.dataset[1][1][:,1])
+scatter!(discretization_08_gaussian.dataset[1][2][:,2], discretization_08_gaussian.dataset[1][2][:,1])
+
+scatter!(discretization_05_gaussian.dataset[1][1][:,2], discretization_05_gaussian.dataset[1][1][:,1])
+scatter!(discretization_05_gaussian.dataset[1][2][:,2], discretization_05_gaussian.dataset[1][2][:,1])
+# discretization_05_gaussian.dataset[1][1][:,2]
+# window_size = 5
+# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
+#                      for i in 1:length(solution.u[1])]
+# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
+# Randomly select some points from the solution
+num_points = 100  # Number of points to select
+selected_indices = rand(1:size(u_noisy1, 2), num_points)
+upoints = [u_noisy1[:, i] for i in selected_indices]
+timepoints = [time[i] for i in selected_indices]
+temp=hcat(upoints...)
+dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
+
+discretization_uniform = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+discretization_08_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+discretization_05_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+
+discretization1 = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+
+scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
+scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
+
+sol = solve(prob, Tsit5(); saveat=0.1)
+odedata = Array(sol) + 0.8 * randn(size(Array(sol)))
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [x(t), y(t)],
+    [α, β, γ, δ],
+    defaults = Dict([α =>2, β => 2, γ =>2, δ =>2]))
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_uniform = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+
+# more iterations for above
+sol3_100_uniform_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+    
+# more iterations for above + strict BC
+sol3_100_uniform_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000_bc_hard = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.05, 0.05],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol3_100_05_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# time
+# dataset
+# chainl[1](time', sol3.estimated_nn_params[1], st)[1][1,:]
+# plot!(time1, chainl[1](time1', sol3.estimated_nn_params[1], st)[1][1,:])
+# plot!(time1, chainl[2](time1', sol3.estimated_nn_params[2], st)[1][1,:])
+# plot!(time1, chainl[1](time1', sol5.estimated_nn_params[1], st)[1][1,:])
+# plot!(time1, chainl[2](time1', sol5.estimated_nn_params[2], st)[1][1,:])
+# time1 = collect(0.0:(1 / 100.0):8.0)
+
+sol4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+sol4_0 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+
+sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+
+# 70 points in dataset
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# SOL6_1 VS SOL6_L2
+sol6_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_2_L2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol6_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_L2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+    
+sol6_L2_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol6_L2_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.05, 0.05],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+# 50 datapoint 0-5 sol5 vs sol4
+# julia> sol4.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.549 ± 0.0058
+#  0.71 ± 0.0042
+#  0.408 ± 0.0063
+#  0.355 ± 0.0015
+
+# julia> sol5.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0052
+#  0.702 ± 0.0034
+#  0.346 ± 0.0037
+#  0.335 ± 0.0013
+
+# 100 datapoint 0-5 sol5_2 vs sol3
+# julia> sol3.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.598 ± 0.0037
+#  0.711 ± 0.0027
+#  0.399 ± 0.0032
+#  0.333 ± 0.0011
+
+# julia> sol5_2.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0035
+#  0.686 ± 0.0026
+#  0.395 ± 0.0029
+#  0.328 ± 0.00095
+
+# timespan for full dataset (0-8)
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true)
+
+sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true
+)
+
+sol7 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], progress = true
+)
+
+sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+    
+sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+ 
+sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+lpfun = function f(chain::Chains) # function to compute the logpdf values
+    niter, nparams, nchains = size(chain)
+    lp = zeros(niter + nchains) # resulting logpdf values
+    for i = 1:nparams
+        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,1]')
+        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,2]')
+    end
+    return lp
+end
+
+DIC, pD = dic(sol3.original.mcmc_chain, lpfun)
+DIC1, pD1 = dic(sol4.original.mcmc_chain, lpfun)
+
+size(sol3.original.mcmc_chain)
+Array(sol3.original.mcmc_chain[1,:,:])
+length(sol3.estimated_nn_params[1])
+chainl[1](time', sol3.estimated_nn_params[1], st)[1]
+
+data = [hcat(calculate_derivatives2(dataset[i][:, 2], dataset[1][:, 1]),dataset[i][:, 2]) for i in eachindex(dataset)]
+dataset[1][:,1]
+dataset[2]
+plot!(dataset[1][:,2],dataset[1][:,1])
+eqs
+sol5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.02, 0.02],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(3, 2),
+        Normal(3, 2)
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2)
+    ], progress = true)
+
+# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
+# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
+
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.5, 0.5],
+    phystd = [0.5, 0.5], l2std = [0.02, 0.02],
+    priorsNNw = (0.0, 5.0), phystdnew = [0.5, 0.5],
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),aa
+    param = [
+        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
+        # Normal(3, 2),
+        # Normal(4, 2),
+        Normal(3, 2),
+        Normal(3, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+function calculate_derivatives2(indvar,depvar)
+    x̂, time = indvar,depvar
+    num_points = length(x̂)
+    # Initialize an array to store the derivative values.
+    derivatives = similar(x̂)
+
+    for i in 2:(num_points - 1)
+        # Calculate the first-order derivative using central differences.
+        Δt_forward = time[i + 1] - time[i]
+        Δt_backward = time[i] - time[i - 1]
+
+        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+        derivatives[i] = derivative
+    end
+
+    # Derivatives at the endpoints can be calculated using forward or backward differences.
+    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+    return derivatives
+end
+dataset[1]
+dataset[2]
+dataset[1][:,1]=calculate_derivatives2(dataset[1][:,2], dataset[1][:,1])
+dataset[2][:,1]=calculate_derivatives2(dataset[2][:,2], dataset[2][:,1])
+dataset[1]
+dataset[2]
+sol7 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.5, 0.5],
+    phystd = [0.5, 0.5], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(0, 2),
+        Normal(0, 2)
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2)
+    ], progress = true)
+
+# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
+# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
+
+sol8 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),aa
+    param = [
+        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
+        # Normal(3, 2),
+        # Normal(4, 2),
+        Normal(0, 2),
+        Normal(0, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+timepoints = collect(0.0:(1 / 100.0):9.0)
+plot!(timepoints', chainl[1](timepoints', sol5_4.estimated_nn_params[1], st)[1])
+plot!(timepoints, chainl[2](timepoints', sol5_4.estimated_nn_params[2], st)[1])
+
+sol_L2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+sol_NEW = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+sol_L2_70 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+sol_NEW_70 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+using Plots, StatsPlots
+plotly()
+
+plot(time, u[1, :])
+plot!(time, u[2, :])
+scatter!(time, u_noisy[1, :])
+scatter!(time, u_noisy[2, :])
+scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
+scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
+
+scatter!(discretization1.dataset[1][1][:,2], discretization1.dataset[1][1][:,1],legend=nothing)
+scatter!(discretization1.dataset[1][2][:,2], discretization1.dataset[1][2][:,1])
+
+# plot28(sol4 seems better vs sol3 plots, params seems similar)
+plot!(sol3.timepoints[1]', sol3.ensemblesol[1])
+plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
+plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
+plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2])
+
+plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
+plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
+plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
+plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2])
+
+plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1],legend=nothing)
+plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
+plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1],legend=nothing)
+plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
+
+plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1],legend=nothing)
+plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
+plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
+plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
+plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1],legend=nothing)
+plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
+
+
+# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
+plot!(sol5.timepoints[1]', sol5.ensemblesol[1],legend=nothing)
+plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
+
+plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1],legend=nothing)
+plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2])
+
+plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1],legend=nothing)
+plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
+
+plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
+plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
+plot!(sol6_L2.timepoints[1]', sol6_L2.ensemblesol[1])
+plot!(sol6_L2.timepoints[2]', sol6_L2.ensemblesol[2])
+
+plot!(sol6_L2_1.timepoints[1]', sol6_L2_1.ensemblesol[1])
+plot!(sol6_L2_1.timepoints[2]', sol6_L2_1.ensemblesol[2])
+
+plot!(sol6_L2_2.timepoints[1]', sol6_L2_2.ensemblesol[1])
+plot!(sol6_L2_2.timepoints[2]', sol6_L2_2.ensemblesol[2])
+
+plot!(sol6_1.timepoints[1]', sol6_1.ensemblesol[1])
+plot!(sol6_1.timepoints[2]', sol6_1.ensemblesol[2])
+plot!(sol6_2.timepoints[1]', sol6_2.ensemblesol[1])
+plot!(sol6_2.timepoints[2]', sol6_2.ensemblesol[2],legend=nothing)
+plot!(sol6_2_L2.timepoints[1]', sol6_2_L2.ensemblesol[1])
+plot!(sol6_2_L2.timepoints[2]', sol6_2_L2.ensemblesol[2],legend=nothing)
+
+# plot52 sol7 vs sol5(sol5 overall better plots, params?)
+plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
+plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
+
+# sol8,sol8_2,sol9,sol9_2 bad
+plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
+plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
+plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
+plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
+
+plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
+plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
+plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
+plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
+
+
+plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
+plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2],legend=nothing)
+
+plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
+plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2],legend=nothing)
+plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
+plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
+
+plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
+plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
+
+plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1],legend=nothing)
+plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
+plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1],legend=nothing)
+plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
+
+plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
+plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2],legend=nothing)
+
+plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1],legend=nothing)
+plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
+
+plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
+plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2],legend=nothing)
+plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
+plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2],legend=nothing)
+
+plot!(sol3_100_05_gaussian.timepoints[1]', sol3_100_05_gaussian.ensemblesol[1])
+plot!(sol3_100_05_gaussian.timepoints[2]', sol3_100_05_gaussian.ensemblesol[2],legend=nothing)
+
+plot!(sol3_100_05_gaussian_new.timepoints[1]', sol3_100_05_gaussian_new.ensemblesol[1])
+plot!(sol3_100_05_gaussian_new.timepoints[2]', sol3_100_05_gaussian_new.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian.timepoints[1]', sol3_100_08_gaussian.ensemblesol[1])
+plot!(sol3_100_08_gaussian.timepoints[2]', sol3_100_08_gaussian.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_new.timepoints[1]', sol3_100_08_gaussian_new.ensemblesol[1])
+plot!(sol3_100_08_gaussian_new.timepoints[2]', sol3_100_08_gaussian_new.ensemblesol[2],legend=nothing)
+
+plot!(sol3_100_uniform.timepoints[1]', sol3_100_uniform.ensemblesol[1])
+plot!(sol3_100_uniform.timepoints[2]', sol3_100_uniform.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_1000.timepoints[1]', sol3_100_08_gaussian_1000.ensemblesol[1])
+plot!(sol3_100_08_gaussian_1000.timepoints[2]', sol3_100_08_gaussian_1000.ensemblesol[2])
+
+plot!(sol3_100_05_gaussian_1000.timepoints[1]', sol3_100_05_gaussian_1000.ensemblesol[1])
+plot!(sol3_100_05_gaussian_1000.timepoints[2]', sol3_100_05_gaussian_1000.ensemblesol[2])
+
+plot!(sol3_100_uniform_1000.timepoints[1]', sol3_100_uniform_1000.ensemblesol[1])
+plot!(sol3_100_uniform_1000.timepoints[2]', sol3_100_uniform_1000.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_1000_bc.timepoints[1]', sol3_100_08_gaussian_1000_bc.ensemblesol[1])
+plot!(sol3_100_08_gaussian_1000_bc.timepoints[2]', sol3_100_08_gaussian_1000_bc.ensemblesol[2])
+
+# test with lower number of points
+# test same calls 2 times or more
+# consider full range dataset case
+# combination of all above
+
+# run 1 100 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol3.estimated_de_params
+sol4.estimated_de_params
+
+# p = [2/3, 2/3, 1/3, 1/3]
+sol3.estimated_de_params
+sol4.estimated_de_params
+dataset[1]
+eqs
+α, β, γ, δ = p
+p
+#  1.0
+#  0.6666666666666666
+#  1.0
+#  0.33333333333333333
+
+1/a
+1/c
+eqs
+using StatsPlots
+plotly()
+plot(sol3.original.mcmc_chain)
+plot(sol5_00.original.mcmc_chain)
+
+# 4-element Vector{Particles{Float64, 34}}:
+#  1.23 ± 0.022
+#  0.858 ± 0.011
+#  3.04 ± 0.079
+#  1.03 ± 0.024
+# 4-element Vector{Particles{Float64, 34}}:
+#  1.2 ± 0.0069
+#  0.835 ± 0.006
+#  3.22 ± 0.01
+#  1.08 ± 0.0053
+# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
+# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
+
+# sol3 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# sol = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     # Kernel = AdvancedHMC.NUTS(0.8),
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ], progress = true)
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
+# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
+
+sol = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2)
+    ])
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    phystdnew = [0.5, 0.5],
+    #  Kernel = AdvancedHMC.NUTS(0.8),
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2)
+    ],
+    Dict_differentials = Dict_differentials)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+# points1 = []
+# for eq_arg in eq_args
+#     a = []
+#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
+#     for i in eachindex(symbols_input)
+#         if symbols_input[i][2] == eq_arg
+#             # include domain points of that depvar
+#             # each loss equation take domain matrix [points..;points..]
+#             push!(a, train_sets[i][:, 2:end]')
+#         end
+#     end
+#     # vcat as new row for next equation
+#     push!(points1, vcat(a...))
+# end
+# println(points1 == points)
+
+# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+# import ModelingToolkit: Interval, infimum, supremum, Distributions
+# using Plots, MonteCarloMeasurements
+
+# @parameters x, t, α
+# @variables u(..)
+# Dt = Differential(t)
+# Dx = Differential(x)
+# Dx2 = Differential(x)^2
+# Dx3 = Differential(x)^3
+# Dx4 = Differential(x)^4
+
+# # α = 1
+# β = 4
+# γ = 1
+# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+# bcs = [u(x, 0) ~ u_analytic(x, 0),
+#     u(-10, t) ~ u_analytic(-10, t),
+#     u(10, t) ~ u_analytic(10, t),
+#     Dx(u(-10, t)) ~ du(-10, t),
+#     Dx(u(10, t)) ~ du(10, t)]
+
+# # Space and time domains
+# domains = [x ∈ Interval(-10.0, 10.0),
+#     t ∈ Interval(0.0, 1.0)]
+
+# # Discretization
+# dx = 0.4;
+# dt = 0.2;
+
+# # Function to compute analytical solution at a specific point (x, t)
+# function u_analytic_point(x, t)
+#     z = -x / 2 + t
+#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# end
+
+# # Function to generate the dataset matrix
+# function generate_dataset_matrix(domains, dx, dt)
+#     x_values = -10:dx:10
+#     t_values = 0.0:dt:1.0
+
+#     dataset = []
+
+#     for t in t_values
+#         for x in x_values
+#             u_value = u_analytic_point(x, t)
+#             push!(dataset, [u_value, x, t])
+#         end
+#     end
+
+#     return vcat([data' for data in dataset]...)
+# end
+
+# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
+
+# # noise to dataset
+# noisydataset = deepcopy(datasetpde)
+# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
+#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
+#                         noisydataset[1][:, 1]
+
+# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+# # Neural network
+# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+#     Lux.Dense(8, 8, Lux.tanh),
+#     Lux.Dense(8, 1))
+
+# discretization = NeuralPDE.BayesianPINN([chain],
+#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+# @named pde_system = PDESystem(eq,
+#     bcs,
+#     domains,
+#     [x, t],
+#     [u(x, t)],
+#     [α],
+#     defaults = Dict([α => 0.5]))
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
+
+# eqs = pde_system.eqs
+# Dict_differentials = Dict()
+# exps = toexpr.(eqs)
+# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+# sol2 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
+#     param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
+#     progress = true)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)
+
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+    dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)], progress = true)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=0.1
+@test mean(u_predict .- u_real) < 0.01
+@test sol1.estimated_de_params[1]≈param atol=0.1
+sol1.estimated_de_params[1]
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+param = 2 * π
+ts_2 = vec(sol2.timepoints[1])
+u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict_2 = pmean(sol2.ensemblesol[1])
+
+@test u_predict_2≈u_real_2 atol=0.1
+@test mean(u_predict_2 .- u_real_2) < 0.01
+@test sol2.estimated_de_params[1]≈param atol=0.1
+sol2.estimated_de_params[1]
+
+plot(ts_2, u_predict_2)
+plot!(ts_2, u_real_2)
+
+@parameters t, σ_
+@variables x(..), y(..), z(..)
+Dt = Differential(t)
+eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 1.0)]
+
+input_ = length(domains)
+n = 7
+chain = [
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1))
+]
+
+#Generate Data
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
+end
+
+u0 = [1.0; 0.0; 0.0]
+tspan = (0.0, 1.0)
+prob = ODEProblem(lorenz!, u0, tspan)
+sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+ts = sol.t
+us = hcat(sol.u...)
+us = us .+ ((0.05 .* randn(size(us))) .* us)
+ts_ = hcat(sol(ts).t...)[1, :]
+dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+    dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs, bcs, domains,
+    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100,
+    bcstd = [0.3, 0.3, 0.3],
+    phystd = [0.1, 0.1, 0.1],
+    l2std = [1, 1, 1],
+    priorsNNw = (0.0, 1.0),
+    saveats = [0.01],
+    param = [Normal(14.0, 2)], progress = true)
+
+idealp = 10.0
+p_ = sol1.estimated_de_params[1]
+@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+
+@parameters x y
+@variables u(..)
+Dxx = Differential(x)^2
+Dyy = Differential(y)^2
+
+# 2D PDE
+eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# Boundary conditions
+bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+
+# Space and time domains
+domains = [x ∈ Interval(0.0, 1.0),
+    y ∈ Interval(0.0, 1.0)]
+
+# Neural network
+dim = 2 # number of dimensions
+chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
+
+# Discretization
+dx = 0.04
+discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
+
+@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 5,
+    bcstd = [0.01, 0.01, 0.01, 0.01],
+    phystd = [0.005],
+    priorsNNw = (0.0, 2.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+xs = sol1.timepoints[1]
+sol1.ensemblesol[1]
+analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+
+dataset = hcat(u_real, xs')
+u_predict = pmean(sol1.ensemblesol[1])
+u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
+@test u_predict≈u_real atol=0.8
+
+using NeuralPDE, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+import ModelingToolkit: Interval, infimum, supremum, Distributions
+using Plots, MonteCarloMeasurements
+
+@parameters x t
+@variables u(..)
+
+Dt = Differential(t)
+Dx = Differential(x)
+Dxx = Dx^2
+α = 0.05
+# Burger's equation
+eq = Dt(u(t, x)) + u(t, x) * Dx(u(t, x)) - α * Dxx(u(t, x)) ~ 0
+
+# boundary conditions
+bcs = [
+    u(0.0, x) ~ -sin(π * x),
+    u(t, -1.0) ~ 0.0,
+    u(t, 1.0) ~ 0.0
+]
+
+domains = [t ∈ Interval(0.0, 1.0), x ∈ Interval(-1.0, 1.0)]
+
+# Neural network
+chain = Lux.Chain(Dense(2, 10, Lux.σ), Dense(10, 10, Lux.σ), Dense(10, 1))
+strategy = NeuralPDE.QuadratureTraining(; abstol = 1e-6, reltol = 1e-6, batch = 200)
+
+indvars = [t, x]
+depvars = [u(t, x)]
+@named pde_system = PDESystem(eq, bcs, domains, indvars, depvars)
+
+# KS EQUATION
+using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+import ModelingToolkit: Interval, infimum, supremum, Distributions
+using Plots, MonteCarloMeasurements, StatsPlots
+
+@parameters x, t, α
+@variables u(..)
+Dt = Differential(t)
+Dx = Differential(x)
+Dx2 = Differential(x)^2
+Dx3 = Differential(x)^3
+Dx4 = Differential(x)^4
+
+# α = 1
+β = 4
+γ = 1
+eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+bcs = [u(x, 0) ~ u_analytic(x, 0),
+    u(-10, t) ~ u_analytic(-10, t),
+    u(10, t) ~ u_analytic(10, t),
+    Dx(u(-10, t)) ~ du(-10, t),
+    Dx(u(10, t)) ~ du(10, t)]
+
+# Space and time domains
+domains = [x ∈ Interval(-10.0, 10.0),
+    t ∈ Interval(0.0, 1.0)]
+
+# Discretization
+dx = 0.4;
+dt = 0.2;
+
+# Function to compute analytical solution at a specific point (x, t)
+function u_analytic_point(x, t)
+    z = -x / 2 + t
+    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+end
+
+# Function to generate the dataset matrix
+function generate_dataset_matrix(domains, dx, dt, xlim, tlim)
+    x_values = xlim[1]:dx:xlim[2]
+    t_values = tlim[1]:dt:tlim[2]
+
+    dataset = []
+
+    for t in t_values
+        for x in x_values
+            u_value = u_analytic_point(x, t)
+            push!(dataset, [u_value, x, t])
+        end
+    end
+
+    return vcat([data' for data in dataset]...)
+end
+
+#   x_values = -10:dx:10
+#     t_values = 0.0:dt:1.0
+
+#     dataset = []
+
+#     for t in t_values
+#         for x in x_values
+#             u_value = u_analytic_point(x, t)
+#             push!(dataset, [u_value, x, t])
+#         end
+#     end
+# dataset
+#    pop= vcat([data' for data in dataset]...)
+
+datasetpde = [generate_dataset_matrix(domains, dx, dt, [-10,10], [0.0,1.0])]
+
+datasetpde_new = [generate_dataset_matrix(domains, dx, dt, [-10,0], [0.0,1.0])]
+
+# noise to dataset
+noisydataset = deepcopy(datasetpde)
+noisydataset[1][:, 1] = noisydataset[1][:, 1] .+ (randn(size(noisydataset[1][:, 1])) .* 0.8)
+
+noisydataset_new = deepcopy(datasetpde_new)
+noisydataset_new[1][:, 1] = noisydataset_new[1][:, 1] .+ (randn(size(noisydataset_new[1][:, 1])) .* 0.8)
+
+# a=discretization_new.dataset[1]
+
+plotly()
+plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+# scatter!(a[1][:, 2], a[1][:, 1])
+scatter!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+plot(datasetpde[1][:, 2],datasetpde[1][:, 3], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+scatter!(noisydataset[1][:, 2],noisydataset[1][:, 3], noisydataset[1][:, 1])
+
+plotly()
+plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
+scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 1])
+
+plot(datasetpde_new[1][:, 2],datasetpde_new[1][:, 3], datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
+scatter!(noisydataset_new[1][:, 2],noisydataset_new[1][:, 3], noisydataset_new[1][:, 1])
+
+noise_std = 1.4
+original_data = datasetpde[1][:, 1]
+original_std = std(original_data)
+ratio = noise_std / original_std
+
+
+using StatsPlots
+plot(sol1.original.mcmc_chain)
+plot(sol2.original.mcmc_chain)
+
+plot(sol0_new.original.mcmc_chain)
+plot(sol2_new.original.mcmc_chain)
+
+# Neural network
+chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+    Lux.Dense(8, 8, Lux.tanh),
+    Lux.Dense(8, 1))
+
+chain_more = Lux.Chain(Lux.Dense(2, 10, Lux.tanh),
+    Lux.Dense(10, 10, Lux.tanh),
+    Lux.Dense(10, 1))
+# chain = Lux.Chain(Lux.Dense(2, 8, Lux.σ),
+#     Lux.Dense(8, 8, Lux.σ),
+#     Lux.Dense(8, 1))
+
+discretization = NeuralPDE.BayesianPINN([chain],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+discretization_more = NeuralPDE.BayesianPINN([chain_more],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+discretization_new = NeuralPDE.BayesianPINN([chain],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset_new, nothing])
+
+
+@named pde_system = PDESystem(eq,
+    bcs,
+    domains,
+    [x, t],
+    [u(x, t)],
+    [α],
+    defaults = Dict([α => 2.0]))
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+Dict_differentials
+
+plot(sol1.original.mcmc_chain)
+meanplot(sol1.original.mcmc_chain)
+autocorplot(sol1.original.mcmc_chain)
+traceplot(sol1.original.mcmc_chain)
+
+plot(sol2.original.mcmc_chain)
+meanplot(sol2.original.mcmc_chain)
+autocorplot(sol2.original.mcmc_chain)
+traceplot(sol2.original.mcmc_chain)
+
+plot(sol0_new.original.mcmc_chain)
+meanplot(sol0_new.original.mcmc_chain)
+autocorplot(sol0_new.original.mcmc_chain)
+
+plot(sol2_new.original.mcmc_chain)
+meanplot(sol2_new.original.mcmc_chain)
+autocorplot(sol2_new.original.mcmc_chain)
+
+plot(sol3_new.original.mcmc_chain)
+meanplot(sol3_new.original.mcmc_chain)
+autocorplot(sol3_new.original.mcmc_chain)
+
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_more = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_more;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.7],
+    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol2_more = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_more;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+# julia> sol2 = ahmc_bayesian_pinn_pde(pde_system,
+#            discretization;
+#            draw_samples = 85, Kernel = AdvancedHMC.NUTS(0.8),
+#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+#            phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0,
+#            priorsNNw = (0.0, 3.0),
+#            saveats = [1 / 100.0, 1 / 100.0],
+#            progress = true)
+# ┌ Info: Current Physics Log-likelihood : 
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -415167
+# ┌ Info: Current Prior Log-likelihood : 
+# └   priorlogpdf(ℓπ, initial_θ) = -214.1825373360679
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, initial_θ) = -109309.44973223892
+# Sampling 100%|███████████████████████████████| Time: 0:14:50
+#   iterations:                                   85
+#   ratio_divergent_transitions:                  0.0
+#   ratio_divergent_transitions_during_adaption:  0.02
+#   n_steps:                                      127
+#   is_accept:                                    true
+#   acceptance_rate:                              0.9982795867682919
+#   log_density:                                  -3832.934953640867
+#   hamiltonian_energy:                           4145.005901868316
+#   hamiltonian_energy_error:                     -0.07863051782624098
+#   max_hamiltonian_energy_error:                 -0.16790754244266282
+#   tree_depth:                                   7
+#   numerical_error:                              false
+#   step_size:                                    0.00018186972987192408
+#   nom_step_size:                                0.00018186972987192408
+#   is_adapt:                                     false
+#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.
+# [ Info: Sampling Complete.
+# ┌ Info: Current Physics Log-likelihood :
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -132
+# ┌ Info: Current Prior Log-likelihood :
+# └   priorlogpdf(ℓπ, samples[end]) = -219.17544656823006
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, samples[end]) = -3481.509412470054
+
+# julia> sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#            discretization;
+#            draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
+#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.7],
+#            phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0,
+#            priorsNNw = (0.0, 3.0),
+#            saveats = [1 / 100.0, 1 / 100.0],
+#            Dict_differentials = Dict_differentials,
+#            progress = true)
+# ┌ Info: Current Physics Log-likelihood : 
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -394622
+# ┌ Info: Current Prior Log-likelihood : 
+# └   priorlogpdf(ℓπ, initial_θ) = -214.1657203956881
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, initial_θ) = -107600.2750860966
+# ┌ Info: Current L2_LOSSY : 
+# └   ℓπ.L2_loss2(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -352.339686469935
+# Sampling 100%|███████████████████████████████| Time: 0:38:42
+#   iterations:                                   90
+#   ratio_divergent_transitions:                  0.24
+#   ratio_divergent_transitions_during_adaption:  0.02
+#   n_steps:                                      34
+#   is_accept:                                    true
+#   acceptance_rate:                              0.0755469536430885
+#   log_density:                                  -6535.135018473582
+#   hamiltonian_energy:                           6681.540376258076
+#   hamiltonian_energy_error:                     -1.7097735125544204
+#   max_hamiltonian_energy_error:                 1216.239238705054
+#   tree_depth:                                   5
+#   numerical_error:                              true
+#   step_size:                                    0.0004111092751764056
+#   nom_step_size:                                0.0004111092751764056
+#   is_adapt:                                     false
+#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.
+# [ Info: Sampling Complete.
+# ┌ Info: Current Physics Log-likelihood : 
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -272
+# ┌ Info: Current Prior Log-likelihood : 
+# └   priorlogpdf(ℓπ, samples[end]) = -218.6535874132563
+# ┌ Info: Current MSE against dataset Log-likelihood : 
+# └   L2LossData(ℓπ, samples[end]) = -3573.449092586736
+# ┌ Info: Current L2_LOSSY : 
+# └   ℓπ.L2_loss2(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -2470.35523478
+
+using MCMCChains
+println(summarize(sol1.original.mcmc_chain))
+plot(sol1.original.mcmc_chain)
+
+sol3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.7], l2std = [0.15], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol3.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol4.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+sol0_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+
+julia> sol5_new = ahmc_bayesian_pinn_pde(pde_system,
+           discretization_new;
+           draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
+           bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+           phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+           priorsNNw = (0.0, 1.0),
+           saveats = [1 / 100.0, 1 / 100.0],
+           progress = true)
+
+sol1_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_1_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.5],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_2_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_3_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.2],
+    phystd = [0.3], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol2_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+
+sol3_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol4_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 160, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol5_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+# phi = discretization.phi[1]
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+plotly()
+p1 = plot(ts, xs, u_predict, title = "predict")
+p2 = plot(ts, xs, u_real, title = "analytic")
+p3 = plot(ts, xs, diff_u, title = "error")
+plot(p1, p2, p3)
+# julia> sol0_new = ahmc_bayesian_pinn_pde(pde_system,
+#            discretization_new;
+#            draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+#            phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+#            priorsNNw = (0.0, 1.0),
+#            saveats = [1 / 100.0, 1 / 100.0],
+#            Dict_differentials = Dict_differentials,
+#            progress = true)
+# ┌ Info: Current Physics Log-likelihood :
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -398314.38213382766
+# ┌ Info: Current Prior Log-likelihood :
+# └   priorlogpdf(ℓπ, initial_θ) = -104.7365701596561
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, initial_θ) = -58553.36940699288
+# ┌ Info: Current L2_LOSSY :
+# └   ℓπ.L2_loss2(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -281.85131447737575
+# Sampling 100%|███████████████████████████████| Time: 0:26:00
+#   iterations:                                   110
+#   ratio_divergent_transitions:                  0.2
+#   ratio_divergent_transitions_during_adaption:  0.03
+#   n_steps:                                      11
+#   is_accept:                                    true
+#   acceptance_rate:                              0.0024891070448310416
+#   log_density:                                  -13158.729119075539
+#   hamiltonian_energy:                           13212.763613683248
+#   hamiltonian_energy_error:                     0.0
+#   max_hamiltonian_energy_error:                 1492.7356803165876
+#   tree_depth:                                   3
+#   numerical_error:                              true
+#   step_size:                                    0.0002145156661425442
+#   nom_step_size:                                0.0002145156661425442
+#   is_adapt:                                     false
+#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.0, 1.0, 1.0, 1.0, 1 ...])
+# [ Info: Sampling Complete.
+# ┌ Info: Current Physics Log-likelihood :
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -908.7769621441158
+# ┌ Info: Current Prior Log-likelihood :
+# └   priorlogpdf(ℓπ, samples[end]) = -136.87645881663929
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, samples[end]) = -1404.7102059521355
+# ┌ Info: Current L2_LOSSY :
+# └   ℓπ.L2_loss2(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -10708.363203924739
+
+# julia> sol2_new = ahmc_bayesian_pinn_pde(pde_system,
+#            discretization_new;
+#            draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
+#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+#            phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+#            priorsNNw = (0.0, 1.0),
+#            saveats = [1 / 100.0, 1 / 100.0],
+#            progress = true)
+# ┌ Info: Current Physics Log-likelihood :
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -397526.19267355377
+# ┌ Info: Current Prior Log-likelihood :
+# └   priorlogpdf(ℓπ, initial_θ) = -105.03439044100367
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, initial_θ) = -60957.24454333089
+# Sampling  99%|███████████████████████████████|  ETA: 0:00:10
+#   iterations:                                   140
+#   ratio_divergent_transitions:                  0.0
+#   ratio_divergent_transitions_during_adaption:  0.01
+#   n_steps:                                      1023
+#   is_accept:                                    true
+#   acceptance_rate:                              0.972620625460237
+#   log_density:                                  -1513.1769839294327
+#   hamiltonian_energy:                           8709.204139640105
+#   hamiltonian_energy_error:                     -0.4925547801958601
+#   max_hamiltonian_energy_error:                 -1.7861646674082294
+#   tree_depth:                                   10
+#   numerical_error:                              false
+#   step_size:                                    0.00011428277138492957
+#   nom_step_size:                                0.00011428277138492957
+#   is_adapt:                                     false
+#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.0, 1.0, 1.0, 1.0, 1 ...])
+# [ Info: Sampling Complete.
+# ┌ Info: Current Physics Log-likelihood :
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = 115.103823132341
+# ┌ Info: Current Prior Log-likelihood :
+# └   priorlogpdf(ℓπ, samples[end]) = -198.39103020815858
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, samples[end]) = -1429.7843027541815

From 2331614f0ee40d89e2adcb86edca8fa18a73ad6f Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 9 May 2024 03:21:00 +0530
Subject: [PATCH 044/107] spelling corrections, cleared test space, seperated
 pr

---
 docs/src/examples/nonlinear_elliptic.md       |    4 +-
 docs/src/tutorials/constraints.md             |    2 +-
 .../tutorials/derivative_neural_network.md    |    4 +-
 src/collocated_estim.jl                       |   46 -
 test/BPINN_PDEinvsol_tests.jl                 | 2397 +----------------
 test/BPINN_pde_experimental.jl                | 1669 ++++++++++++
 test/bpinnexperimental.jl                     |  140 -
 7 files changed, 1675 insertions(+), 2587 deletions(-)
 delete mode 100644 src/collocated_estim.jl
 create mode 100644 test/BPINN_pde_experimental.jl
 delete mode 100644 test/bpinnexperimental.jl

diff --git a/docs/src/examples/nonlinear_elliptic.md b/docs/src/examples/nonlinear_elliptic.md
index 155330b2bc..d7f8a58579 100644
--- a/docs/src/examples/nonlinear_elliptic.md
+++ b/docs/src/examples/nonlinear_elliptic.md
@@ -89,7 +89,7 @@ sym_prob = NeuralPDE.symbolic_discretize(pdesystem, discretization)
 
 pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
 bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions[1:6]
-aprox_derivative_loss_functions = sym_prob.loss_functions.bc_loss_functions[7:end]
+approx_derivative_loss_functions = sym_prob.loss_functions.bc_loss_functions[7:end]
 
 global iteration = 0
 callback = function (p, l)
@@ -97,7 +97,7 @@ callback = function (p, l)
         println("loss: ", l)
         println("pde_losses: ", map(l_ -> l_(p.u), pde_inner_loss_functions))
         println("bcs_losses: ", map(l_ -> l_(p.u), bcs_inner_loss_functions))
-        println("der_losses: ", map(l_ -> l_(p.u), aprox_derivative_loss_functions))
+        println("der_losses: ", map(l_ -> l_(p.u), approx_derivative_loss_functions))
     end
     global iteration += 1
     return false
diff --git a/docs/src/tutorials/constraints.md b/docs/src/tutorials/constraints.md
index 0898fab116..e87e047ae3 100644
--- a/docs/src/tutorials/constraints.md
+++ b/docs/src/tutorials/constraints.md
@@ -74,7 +74,7 @@ sym_prob = NeuralPDE.symbolic_discretize(pdesystem, discretization)
 
 pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
 bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
-aprox_derivative_loss_functions = sym_prob.loss_functions.bc_loss_functions
+approx_derivative_loss_functions = sym_prob.loss_functions.bc_loss_functions
 
 cb_ = function (p, l)
     println("loss: ", l)
diff --git a/docs/src/tutorials/derivative_neural_network.md b/docs/src/tutorials/derivative_neural_network.md
index d7ccec27ad..3963be4308 100644
--- a/docs/src/tutorials/derivative_neural_network.md
+++ b/docs/src/tutorials/derivative_neural_network.md
@@ -102,13 +102,13 @@ sym_prob = NeuralPDE.symbolic_discretize(pdesystem, discretization)
 
 pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
 bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions[1:7]
-aprox_derivative_loss_functions = sym_prob.loss_functions.bc_loss_functions[9:end]
+approx_derivative_loss_functions = sym_prob.loss_functions.bc_loss_functions[9:end]
 
 callback = function (p, l)
     println("loss: ", l)
     println("pde_losses: ", map(l_ -> l_(p.u), pde_inner_loss_functions))
     println("bcs_losses: ", map(l_ -> l_(p.u), bcs_inner_loss_functions))
-    println("der_losses: ", map(l_ -> l_(p.u), aprox_derivative_loss_functions))
+    println("der_losses: ", map(l_ -> l_(p.u), approx_derivative_loss_functions))
     return false
 end
 
diff --git a/src/collocated_estim.jl b/src/collocated_estim.jl
deleted file mode 100644
index 0fe608e951..0000000000
--- a/src/collocated_estim.jl
+++ /dev/null
@@ -1,46 +0,0 @@
-# suggested extra loss function for ODE solver case
-function L2loss2(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        autodiff = Tar.autodiff
-        # Timepoints to enforce Physics 
-        t = Tar.dataset[end]
-        u1 = Tar.dataset[2]
-        û = Tar.dataset[1]
-
-        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        if length(Tar.prob.u0) == 1
-            physsol = [f(û[i],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        else
-            physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
-                       for i in 1:length(û)]
-        end
-        #form of NN output matrix output dim x n 
-        deri_physsol = reduce(hcat, physsol)
-   
-        physlogprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # can add phystd[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 4.0) .*
-                        ones(length(nnsol[i, :]))))),
-                nnsol[i, :])
-        end
-        return physlogprob
-    else
-        return 0
-    end
-end
\ No newline at end of file
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index e756a3861f..5cc8fe95fe 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -243,2399 +243,4 @@ u_predict = pmean(sol2.ensemblesol[1])
 
 @test u_predict≈u_real atol=1.5
 @test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-
-println("Example 3: Lotka Volterra with New parameter estimation")
-@parameters t α β γ δ
-@variables x(..) y(..)
-
-Dt = Differential(t)
-eqs = [Dt(x(t)) * α  ~ x(t) - β * x(t) * y(t), Dt(y(t)) * δ ~ x(t) * y(t) - y(t)*γ ]
-bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
-domains = [t ∈ Interval(0.0, 7.0)]
-
-# Define the parameters' values
-# α, β, γ, δ = p
-
-# regular equations
-# dx = (1.5 - y) * x # prey
-# dy = (x - 3.0) * y # predator
-# p = [1.5, 1.0, 3.0, 1.0] non transformed values
-
-# transformed equations
-# dx*0.666 = (1 - 0.666 * y) * x # prey
-# dy*1.0 = (x - 3.0) * y # predator
-# p = [0.666, 0.666, 3.0, 1.0] transformed values (change is scale also ensured!)
-
-chainl = [
-    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin),Lux.Dense(5, 1)),
-    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin),Lux.Dense(5, 1))
-]
-
-initl, st = Lux.setup(Random.default_rng(), chainl[1])
-initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
-
-using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
-
-function lotka_volterra(u, p, t)
-    # Model parameters. 
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-# initial-value problem.
-u0 = [1.0, 1.0]
-# p = [2/3, 2/3, 1/3.0, 1/3.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 7.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-dt = 0.01
-solution = solve(prob, Tsit5(); saveat = dt)
-solution1 = solve(prob, Tsit5(); saveat = 0.02)
-
-function calculate_errors(approx_sol, solution_points)
-  # Check vector lengths match
-  if length(approx_sol) != length(solution_points)
-    error("Vectors must have the same length")
-  end
-
-  # Calculate errors
-  n = length(approx_sol)
-  errors = randn(n)
-  for i in 1:n
-    errors[i] = solution_points[i] - approx_sol[i]
-  end
-
-  # Calculate RMSE
-  rmse = sqrt(mean(errors.^2))
-
-  # Calculate MAE
-  mae = mean(abs.(errors))
-
-  # Calculate maximum absolute error
-  max_error = maximum(abs.(errors))
-
-  # Return dictionary with errors
-  return Dict(
-      "RMSE" => rmse,
-      "MAE" => mae,
-      "Max Abs Error" => max_error,
-  )
-end
-u = hcat(solution1.u...)
-u[1,:]
-sol6_2.ensemblesol[1]
-
-a1=calculate_errors(pmean(sol6_1.ensemblesol[1]), u1[1,:])
-b1=calculate_errors(pmean(sol6_1.ensemblesol[2]), u1[2,:])
-
-a=calculate_errors(pmean(sol6_2.ensemblesol[1]), u[1,:])
-b=calculate_errors(pmean(sol6_2.ensemblesol[2]), u[2,:])
-
-c=calculate_errors(pmean(sol6_L2_2.ensemblesol[1]), u[1,:])
-d=calculate_errors(pmean(sol6_L2_2.ensemblesol[2]), u[2,:])
-
-e=calculate_errors(pmean(sol6_L2_1.ensemblesol[1]), u[1,:])
-f=calculate_errors(pmean(sol6_L2_1.ensemblesol[2]), u[2,:])
-
-g=calculate_errors(pmean(sol6_L2.ensemblesol[1]), u[1,:])
-h=calculate_errors(pmean(sol6_L2.ensemblesol[2]), u[2,:])
-sol6_2.ensemblesol[1]
-sol6_2.ensemblesol[2]
-
-sol6_L2.ensemblesol[1]
-sol6_L2.ensemblesol[2]
-
-# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
-#     smoothed_data = similar(data, T, length(data))
-
-#     for i in 1:length(data)
-#         start_idx = max(1, i - window_size)
-#         end_idx = min(length(data), i + window_size)
-#         smoothed_data[i] = mean(data[start_idx:end_idx])
-#     end
-
-#     return smoothed_data'
-# end
-
-# Extract solution
-time = solution.t
-u = hcat(solution.u...)
-time1=solution.t
-u_noisy = u .+ u .* (0.2 .* randn(size(u)))
-u_noisy0 = u .+ (3.0 .* rand(size(u)[1],size(u)[2]) .- 1.5)
-u_noisy1 = u .+ (0.8.* randn(size(Array(solution))))
-u_noisy2 = u .+ (0.5.* randn(size(Array(solution))))
-
-plot(time,u[1,:])
-plot!(time,u[2,:])
-scatter!(time1,u_noisy0[1,:])
-scatter!(time1,u_noisy0[2,:])
-scatter!(discretization_08_gaussian.dataset[1][1][:,2], discretization_08_gaussian.dataset[1][1][:,1])
-scatter!(discretization_08_gaussian.dataset[1][2][:,2], discretization_08_gaussian.dataset[1][2][:,1])
-
-scatter!(discretization_05_gaussian.dataset[1][1][:,2], discretization_05_gaussian.dataset[1][1][:,1])
-scatter!(discretization_05_gaussian.dataset[1][2][:,2], discretization_05_gaussian.dataset[1][2][:,1])
-# discretization_05_gaussian.dataset[1][1][:,2]
-# window_size = 5
-# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
-#                      for i in 1:length(solution.u[1])]
-# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
-# Randomly select some points from the solution
-num_points = 100  # Number of points to select
-selected_indices = rand(1:size(u_noisy1, 2), num_points)
-upoints = [u_noisy1[:, i] for i in selected_indices]
-timepoints = [time[i] for i in selected_indices]
-temp=hcat(upoints...)
-dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
-
-discretization_uniform = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-discretization_08_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-discretization_05_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-
-discretization1 = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-
-scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
-scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
-
-sol = solve(prob, Tsit5(); saveat=0.1)
-odedata = Array(sol) + 0.8 * randn(size(Array(sol)))
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [x(t), y(t)],
-    [α, β, γ, δ],
-    defaults = Dict([α =>2, β => 2, γ =>2, δ =>2]))
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_uniform = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-
-# more iterations for above
-sol3_100_uniform_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-    
-# more iterations for above + strict BC
-sol3_100_uniform_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000_bc_hard = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.05, 0.05],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol3_100_05_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# time
-# dataset
-# chainl[1](time', sol3.estimated_nn_params[1], st)[1][1,:]
-# plot!(time1, chainl[1](time1', sol3.estimated_nn_params[1], st)[1][1,:])
-# plot!(time1, chainl[2](time1', sol3.estimated_nn_params[2], st)[1][1,:])
-# plot!(time1, chainl[1](time1', sol5.estimated_nn_params[1], st)[1][1,:])
-# plot!(time1, chainl[2](time1', sol5.estimated_nn_params[2], st)[1][1,:])
-# time1 = collect(0.0:(1 / 100.0):8.0)
-
-sol4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-sol4_0 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-
-sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-
-# 70 points in dataset
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# SOL6_1 VS SOL6_L2
-sol6_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_2_L2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol6_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_L2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-    
-sol6_L2_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol6_L2_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.05, 0.05],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-# 50 datapoint 0-5 sol5 vs sol4
-# julia> sol4.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.549 ± 0.0058
-#  0.71 ± 0.0042
-#  0.408 ± 0.0063
-#  0.355 ± 0.0015
-
-# julia> sol5.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0052
-#  0.702 ± 0.0034
-#  0.346 ± 0.0037
-#  0.335 ± 0.0013
-
-# 100 datapoint 0-5 sol5_2 vs sol3
-# julia> sol3.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.598 ± 0.0037
-#  0.711 ± 0.0027
-#  0.399 ± 0.0032
-#  0.333 ± 0.0011
-
-# julia> sol5_2.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0035
-#  0.686 ± 0.0026
-#  0.395 ± 0.0029
-#  0.328 ± 0.00095
-
-# timespan for full dataset (0-8)
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true)
-
-sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true
-)
-
-sol7 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], progress = true
-)
-
-sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-    
-sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
- 
-sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-lpfun = function f(chain::Chains) # function to compute the logpdf values
-    niter, nparams, nchains = size(chain)
-    lp = zeros(niter + nchains) # resulting logpdf values
-    for i = 1:nparams
-        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,1]')
-        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,2]')
-    end
-    return lp
-end
-
-DIC, pD = dic(sol3.original.mcmc_chain, lpfun)
-DIC1, pD1 = dic(sol4.original.mcmc_chain, lpfun)
-
-size(sol3.original.mcmc_chain)
-Array(sol3.original.mcmc_chain[1,:,:])
-length(sol3.estimated_nn_params[1])
-chainl[1](time', sol3.estimated_nn_params[1], st)[1]
-
-data = [hcat(calculate_derivatives2(dataset[i][:, 2], dataset[1][:, 1]),dataset[i][:, 2]) for i in eachindex(dataset)]
-dataset[1][:,1]
-dataset[2]
-plot!(dataset[1][:,2],dataset[1][:,1])
-eqs
-sol5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.02, 0.02],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(3, 2),
-        Normal(3, 2)
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2)
-    ], progress = true)
-
-# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
-# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
-
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.5, 0.5],
-    phystd = [0.5, 0.5], l2std = [0.02, 0.02],
-    priorsNNw = (0.0, 5.0), phystdnew = [0.5, 0.5],
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),aa
-    param = [
-        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
-        # Normal(3, 2),
-        # Normal(4, 2),
-        Normal(3, 2),
-        Normal(3, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-function calculate_derivatives2(indvar,depvar)
-    x̂, time = indvar,depvar
-    num_points = length(x̂)
-    # Initialize an array to store the derivative values.
-    derivatives = similar(x̂)
-
-    for i in 2:(num_points - 1)
-        # Calculate the first-order derivative using central differences.
-        Δt_forward = time[i + 1] - time[i]
-        Δt_backward = time[i] - time[i - 1]
-
-        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-        derivatives[i] = derivative
-    end
-
-    # Derivatives at the endpoints can be calculated using forward or backward differences.
-    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-    return derivatives
-end
-dataset[1]
-dataset[2]
-dataset[1][:,1]=calculate_derivatives2(dataset[1][:,2], dataset[1][:,1])
-dataset[2][:,1]=calculate_derivatives2(dataset[2][:,2], dataset[2][:,1])
-dataset[1]
-dataset[2]
-sol7 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.5, 0.5],
-    phystd = [0.5, 0.5], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(0, 2),
-        Normal(0, 2)
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2)
-    ], progress = true)
-
-# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
-# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
-
-sol8 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),aa
-    param = [
-        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
-        # Normal(3, 2),
-        # Normal(4, 2),
-        Normal(0, 2),
-        Normal(0, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-timepoints = collect(0.0:(1 / 100.0):9.0)
-plot!(timepoints', chainl[1](timepoints', sol5_4.estimated_nn_params[1], st)[1])
-plot!(timepoints, chainl[2](timepoints', sol5_4.estimated_nn_params[2], st)[1])
-
-sol_L2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-sol_NEW = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-sol_L2_70 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-sol_NEW_70 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-using Plots, StatsPlots
-plotly()
-
-plot(time, u[1, :])
-plot!(time, u[2, :])
-scatter!(time, u_noisy[1, :])
-scatter!(time, u_noisy[2, :])
-scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
-scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
-
-scatter!(discretization1.dataset[1][1][:,2], discretization1.dataset[1][1][:,1],legend=nothing)
-scatter!(discretization1.dataset[1][2][:,2], discretization1.dataset[1][2][:,1])
-
-# plot28(sol4 seems better vs sol3 plots, params seems similar)
-plot!(sol3.timepoints[1]', sol3.ensemblesol[1])
-plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
-plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
-plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2])
-
-plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
-plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
-plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
-plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2])
-
-plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1],legend=nothing)
-plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
-plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1],legend=nothing)
-plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
-
-plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1],legend=nothing)
-plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
-plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
-plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
-plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1],legend=nothing)
-plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
-
-
-# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
-plot!(sol5.timepoints[1]', sol5.ensemblesol[1],legend=nothing)
-plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
-
-plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1],legend=nothing)
-plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2])
-
-plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1],legend=nothing)
-plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
-
-plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
-plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
-plot!(sol6_L2.timepoints[1]', sol6_L2.ensemblesol[1])
-plot!(sol6_L2.timepoints[2]', sol6_L2.ensemblesol[2])
-
-plot!(sol6_L2_1.timepoints[1]', sol6_L2_1.ensemblesol[1])
-plot!(sol6_L2_1.timepoints[2]', sol6_L2_1.ensemblesol[2])
-
-plot!(sol6_L2_2.timepoints[1]', sol6_L2_2.ensemblesol[1])
-plot!(sol6_L2_2.timepoints[2]', sol6_L2_2.ensemblesol[2])
-
-plot!(sol6_1.timepoints[1]', sol6_1.ensemblesol[1])
-plot!(sol6_1.timepoints[2]', sol6_1.ensemblesol[2])
-plot!(sol6_2.timepoints[1]', sol6_2.ensemblesol[1])
-plot!(sol6_2.timepoints[2]', sol6_2.ensemblesol[2],legend=nothing)
-plot!(sol6_2_L2.timepoints[1]', sol6_2_L2.ensemblesol[1])
-plot!(sol6_2_L2.timepoints[2]', sol6_2_L2.ensemblesol[2],legend=nothing)
-
-# plot52 sol7 vs sol5(sol5 overall better plots, params?)
-plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
-plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
-
-# sol8,sol8_2,sol9,sol9_2 bad
-plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
-plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
-plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
-plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
-
-plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
-plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
-plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
-plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
-
-
-plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
-plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2],legend=nothing)
-
-plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
-plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2],legend=nothing)
-plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
-plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
-
-plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
-plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
-
-plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1],legend=nothing)
-plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
-plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1],legend=nothing)
-plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
-
-plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
-plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2],legend=nothing)
-
-plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1],legend=nothing)
-plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
-
-plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
-plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2],legend=nothing)
-plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
-plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2],legend=nothing)
-
-plot!(sol3_100_05_gaussian.timepoints[1]', sol3_100_05_gaussian.ensemblesol[1])
-plot!(sol3_100_05_gaussian.timepoints[2]', sol3_100_05_gaussian.ensemblesol[2],legend=nothing)
-
-plot!(sol3_100_05_gaussian_new.timepoints[1]', sol3_100_05_gaussian_new.ensemblesol[1])
-plot!(sol3_100_05_gaussian_new.timepoints[2]', sol3_100_05_gaussian_new.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian.timepoints[1]', sol3_100_08_gaussian.ensemblesol[1])
-plot!(sol3_100_08_gaussian.timepoints[2]', sol3_100_08_gaussian.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_new.timepoints[1]', sol3_100_08_gaussian_new.ensemblesol[1])
-plot!(sol3_100_08_gaussian_new.timepoints[2]', sol3_100_08_gaussian_new.ensemblesol[2],legend=nothing)
-
-plot!(sol3_100_uniform.timepoints[1]', sol3_100_uniform.ensemblesol[1])
-plot!(sol3_100_uniform.timepoints[2]', sol3_100_uniform.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_1000.timepoints[1]', sol3_100_08_gaussian_1000.ensemblesol[1])
-plot!(sol3_100_08_gaussian_1000.timepoints[2]', sol3_100_08_gaussian_1000.ensemblesol[2])
-
-plot!(sol3_100_05_gaussian_1000.timepoints[1]', sol3_100_05_gaussian_1000.ensemblesol[1])
-plot!(sol3_100_05_gaussian_1000.timepoints[2]', sol3_100_05_gaussian_1000.ensemblesol[2])
-
-plot!(sol3_100_uniform_1000.timepoints[1]', sol3_100_uniform_1000.ensemblesol[1])
-plot!(sol3_100_uniform_1000.timepoints[2]', sol3_100_uniform_1000.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_1000_bc.timepoints[1]', sol3_100_08_gaussian_1000_bc.ensemblesol[1])
-plot!(sol3_100_08_gaussian_1000_bc.timepoints[2]', sol3_100_08_gaussian_1000_bc.ensemblesol[2])
-
-# test with lower number of points
-# test same calls 2 times or more
-# consider full range dataset case
-# combination of all above
-
-# run 1 100 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol3.estimated_de_params
-sol4.estimated_de_params
-
-# p = [2/3, 2/3, 1/3, 1/3]
-sol3.estimated_de_params
-sol4.estimated_de_params
-dataset[1]
-eqs
-α, β, γ, δ = p
-p
-#  1.0
-#  0.6666666666666666
-#  1.0
-#  0.33333333333333333
-
-1/a
-1/c
-eqs
-using StatsPlots
-plotly()
-plot(sol3.original.mcmc_chain)
-plot(sol5_00.original.mcmc_chain)
-
-# 4-element Vector{Particles{Float64, 34}}:
-#  1.23 ± 0.022
-#  0.858 ± 0.011
-#  3.04 ± 0.079
-#  1.03 ± 0.024
-# 4-element Vector{Particles{Float64, 34}}:
-#  1.2 ± 0.0069
-#  0.835 ± 0.006
-#  3.22 ± 0.01
-#  1.08 ± 0.0053
-# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
-# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
-
-# sol3 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# sol = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     # Kernel = AdvancedHMC.NUTS(0.8),
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ], progress = true)
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
-# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
-
-sol = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ])
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    phystdnew = [0.5, 0.5],
-    #  Kernel = AdvancedHMC.NUTS(0.8),
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ],
-    Dict_differentials = Dict_differentials)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-# points1 = []
-# for eq_arg in eq_args
-#     a = []
-#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
-#     for i in eachindex(symbols_input)
-#         if symbols_input[i][2] == eq_arg
-#             # include domain points of that depvar
-#             # each loss equation take domain matrix [points..;points..]
-#             push!(a, train_sets[i][:, 2:end]')
-#         end
-#     end
-#     # vcat as new row for next equation
-#     push!(points1, vcat(a...))
-# end
-# println(points1 == points)
-
-# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-# import ModelingToolkit: Interval, infimum, supremum, Distributions
-# using Plots, MonteCarloMeasurements
-
-# @parameters x, t, α
-# @variables u(..)
-# Dt = Differential(t)
-# Dx = Differential(x)
-# Dx2 = Differential(x)^2
-# Dx3 = Differential(x)^3
-# Dx4 = Differential(x)^4
-
-# # α = 1
-# β = 4
-# γ = 1
-# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-# bcs = [u(x, 0) ~ u_analytic(x, 0),
-#     u(-10, t) ~ u_analytic(-10, t),
-#     u(10, t) ~ u_analytic(10, t),
-#     Dx(u(-10, t)) ~ du(-10, t),
-#     Dx(u(10, t)) ~ du(10, t)]
-
-# # Space and time domains
-# domains = [x ∈ Interval(-10.0, 10.0),
-#     t ∈ Interval(0.0, 1.0)]
-
-# # Discretization
-# dx = 0.4;
-# dt = 0.2;
-
-# # Function to compute analytical solution at a specific point (x, t)
-# function u_analytic_point(x, t)
-#     z = -x / 2 + t
-#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# end
-
-# # Function to generate the dataset matrix
-# function generate_dataset_matrix(domains, dx, dt)
-#     x_values = -10:dx:10
-#     t_values = 0.0:dt:1.0
-
-#     dataset = []
-
-#     for t in t_values
-#         for x in x_values
-#             u_value = u_analytic_point(x, t)
-#             push!(dataset, [u_value, x, t])
-#         end
-#     end
-
-#     return vcat([data' for data in dataset]...)
-# end
-
-# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
-
-# # noise to dataset
-# noisydataset = deepcopy(datasetpde)
-# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
-#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
-#                         noisydataset[1][:, 1]
-
-# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-# # Neural network
-# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-#     Lux.Dense(8, 8, Lux.tanh),
-#     Lux.Dense(8, 1))
-
-# discretization = NeuralPDE.BayesianPINN([chain],
-#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-# @named pde_system = PDESystem(eq,
-#     bcs,
-#     domains,
-#     [x, t],
-#     [u(x, t)],
-#     [α],
-#     defaults = Dict([α => 0.5]))
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
-
-# eqs = pde_system.eqs
-# Dict_differentials = Dict()
-# exps = toexpr.(eqs)
-# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-# sol2 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
-#     param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
-#     progress = true)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
-
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-    dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)], progress = true)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=0.1
-@test mean(u_predict .- u_real) < 0.01
-@test sol1.estimated_de_params[1]≈param atol=0.1
-sol1.estimated_de_params[1]
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-param = 2 * π
-ts_2 = vec(sol2.timepoints[1])
-u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict_2 = pmean(sol2.ensemblesol[1])
-
-@test u_predict_2≈u_real_2 atol=0.1
-@test mean(u_predict_2 .- u_real_2) < 0.01
-@test sol2.estimated_de_params[1]≈param atol=0.1
-sol2.estimated_de_params[1]
-
-plot(ts_2, u_predict_2)
-plot!(ts_2, u_real_2)
-
-@parameters t, σ_
-@variables x(..), y(..), z(..)
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-
-input_ = length(domains)
-n = 7
-chain = [
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1))
-]
-
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
-end
-
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-ts = sol.t
-us = hcat(sol.u...)
-us = us .+ ((0.05 .* randn(size(us))) .* us)
-ts_ = hcat(sol(ts).t...)[1, :]
-dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-    dataset = [dataset, nothing])
-
-@named pde_system = PDESystem(eqs, bcs, domains,
-    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100,
-    bcstd = [0.3, 0.3, 0.3],
-    phystd = [0.1, 0.1, 0.1],
-    l2std = [1, 1, 1],
-    priorsNNw = (0.0, 1.0),
-    saveats = [0.01],
-    param = [Normal(14.0, 2)], progress = true)
-
-idealp = 10.0
-p_ = sol1.estimated_de_params[1]
-@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-
-@parameters x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-
-# 2D PDE
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# Boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
-
-# Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
-
-# Neural network
-dim = 2 # number of dimensions
-chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
-
-# Discretization
-dx = 0.04
-discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
-
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 5,
-    bcstd = [0.01, 0.01, 0.01, 0.01],
-    phystd = [0.005],
-    priorsNNw = (0.0, 2.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-xs = sol1.timepoints[1]
-sol1.ensemblesol[1]
-analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-
-dataset = hcat(u_real, xs')
-u_predict = pmean(sol1.ensemblesol[1])
-u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-@test u_predict≈u_real atol=0.8
-
-using NeuralPDE, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-import ModelingToolkit: Interval, infimum, supremum, Distributions
-using Plots, MonteCarloMeasurements
-
-@parameters x t
-@variables u(..)
-
-Dt = Differential(t)
-Dx = Differential(x)
-Dxx = Dx^2
-α = 0.05
-# Burger's equation
-eq = Dt(u(t, x)) + u(t, x) * Dx(u(t, x)) - α * Dxx(u(t, x)) ~ 0
-
-# boundary conditions
-bcs = [
-    u(0.0, x) ~ -sin(π * x),
-    u(t, -1.0) ~ 0.0,
-    u(t, 1.0) ~ 0.0
-]
-
-domains = [t ∈ Interval(0.0, 1.0), x ∈ Interval(-1.0, 1.0)]
-
-# Neural network
-chain = Lux.Chain(Dense(2, 10, Lux.σ), Dense(10, 10, Lux.σ), Dense(10, 1))
-strategy = NeuralPDE.QuadratureTraining(; abstol = 1e-6, reltol = 1e-6, batch = 200)
-
-indvars = [t, x]
-depvars = [u(t, x)]
-@named pde_system = PDESystem(eq, bcs, domains, indvars, depvars)
-
-# KS EQUATION
-using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-import ModelingToolkit: Interval, infimum, supremum, Distributions
-using Plots, MonteCarloMeasurements, StatsPlots
-
-@parameters x, t, α
-@variables u(..)
-Dt = Differential(t)
-Dx = Differential(x)
-Dx2 = Differential(x)^2
-Dx3 = Differential(x)^3
-Dx4 = Differential(x)^4
-
-# α = 1
-β = 4
-γ = 1
-eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-bcs = [u(x, 0) ~ u_analytic(x, 0),
-    u(-10, t) ~ u_analytic(-10, t),
-    u(10, t) ~ u_analytic(10, t),
-    Dx(u(-10, t)) ~ du(-10, t),
-    Dx(u(10, t)) ~ du(10, t)]
-
-# Space and time domains
-domains = [x ∈ Interval(-10.0, 10.0),
-    t ∈ Interval(0.0, 1.0)]
-
-# Discretization
-dx = 0.4;
-dt = 0.2;
-
-# Function to compute analytical solution at a specific point (x, t)
-function u_analytic_point(x, t)
-    z = -x / 2 + t
-    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-end
-
-# Function to generate the dataset matrix
-function generate_dataset_matrix(domains, dx, dt, xlim, tlim)
-    x_values = xlim[1]:dx:xlim[2]
-    t_values = tlim[1]:dt:tlim[2]
-
-    dataset = []
-
-    for t in t_values
-        for x in x_values
-            u_value = u_analytic_point(x, t)
-            push!(dataset, [u_value, x, t])
-        end
-    end
-
-    return vcat([data' for data in dataset]...)
-end
-
-#   x_values = -10:dx:10
-#     t_values = 0.0:dt:1.0
-
-#     dataset = []
-
-#     for t in t_values
-#         for x in x_values
-#             u_value = u_analytic_point(x, t)
-#             push!(dataset, [u_value, x, t])
-#         end
-#     end
-# dataset
-#    pop= vcat([data' for data in dataset]...)
-
-datasetpde = [generate_dataset_matrix(domains, dx, dt, [-10,10], [0.0,1.0])]
-
-datasetpde_new = [generate_dataset_matrix(domains, dx, dt, [-10,0], [0.0,1.0])]
-
-# noise to dataset
-noisydataset = deepcopy(datasetpde)
-noisydataset[1][:, 1] = noisydataset[1][:, 1] .+ (randn(size(noisydataset[1][:, 1])) .* 0.8)
-
-noisydataset_new = deepcopy(datasetpde_new)
-noisydataset_new[1][:, 1] = noisydataset_new[1][:, 1] .+ (randn(size(noisydataset_new[1][:, 1])) .* 0.8)
-
-# a=discretization_new.dataset[1]
-
-plotly()
-plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-# scatter!(a[1][:, 2], a[1][:, 1])
-scatter!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-plot(datasetpde[1][:, 2],datasetpde[1][:, 3], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-scatter!(noisydataset[1][:, 2],noisydataset[1][:, 3], noisydataset[1][:, 1])
-
-plotly()
-plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
-scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 1])
-
-plot(datasetpde_new[1][:, 2],datasetpde_new[1][:, 3], datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
-scatter!(noisydataset_new[1][:, 2],noisydataset_new[1][:, 3], noisydataset_new[1][:, 1])
-
-noise_std = 1.4
-original_data = datasetpde[1][:, 1]
-original_std = std(original_data)
-ratio = noise_std / original_std
-
-
-using StatsPlots
-plot(sol1.original.mcmc_chain)
-plot(sol2.original.mcmc_chain)
-
-plot(sol0_new.original.mcmc_chain)
-plot(sol2_new.original.mcmc_chain)
-
-# Neural network
-chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-    Lux.Dense(8, 8, Lux.tanh),
-    Lux.Dense(8, 1))
-
-chain_more = Lux.Chain(Lux.Dense(2, 10, Lux.tanh),
-    Lux.Dense(10, 10, Lux.tanh),
-    Lux.Dense(10, 1))
-# chain = Lux.Chain(Lux.Dense(2, 8, Lux.σ),
-#     Lux.Dense(8, 8, Lux.σ),
-#     Lux.Dense(8, 1))
-
-discretization = NeuralPDE.BayesianPINN([chain],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-discretization_more = NeuralPDE.BayesianPINN([chain_more],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-discretization_new = NeuralPDE.BayesianPINN([chain],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset_new, nothing])
-
-
-@named pde_system = PDESystem(eq,
-    bcs,
-    domains,
-    [x, t],
-    [u(x, t)],
-    [α],
-    defaults = Dict([α => 2.0]))
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-Dict_differentials
-
-plot(sol1.original.mcmc_chain)
-meanplot(sol1.original.mcmc_chain)
-autocorplot(sol1.original.mcmc_chain)
-traceplot(sol1.original.mcmc_chain)
-
-plot(sol2.original.mcmc_chain)
-meanplot(sol2.original.mcmc_chain)
-autocorplot(sol2.original.mcmc_chain)
-traceplot(sol2.original.mcmc_chain)
-
-plot(sol0_new.original.mcmc_chain)
-meanplot(sol0_new.original.mcmc_chain)
-autocorplot(sol0_new.original.mcmc_chain)
-
-plot(sol2_new.original.mcmc_chain)
-meanplot(sol2_new.original.mcmc_chain)
-autocorplot(sol2_new.original.mcmc_chain)
-
-plot(sol3_new.original.mcmc_chain)
-meanplot(sol3_new.original.mcmc_chain)
-autocorplot(sol3_new.original.mcmc_chain)
-
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_more = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_more;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.7],
-    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol2_more = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_more;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-# julia> sol2 = ahmc_bayesian_pinn_pde(pde_system,
-#            discretization;
-#            draw_samples = 85, Kernel = AdvancedHMC.NUTS(0.8),
-#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-#            phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0,
-#            priorsNNw = (0.0, 3.0),
-#            saveats = [1 / 100.0, 1 / 100.0],
-#            progress = true)
-# ┌ Info: Current Physics Log-likelihood : 
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -415167
-# ┌ Info: Current Prior Log-likelihood : 
-# └   priorlogpdf(ℓπ, initial_θ) = -214.1825373360679
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, initial_θ) = -109309.44973223892
-# Sampling 100%|███████████████████████████████| Time: 0:14:50
-#   iterations:                                   85
-#   ratio_divergent_transitions:                  0.0
-#   ratio_divergent_transitions_during_adaption:  0.02
-#   n_steps:                                      127
-#   is_accept:                                    true
-#   acceptance_rate:                              0.9982795867682919
-#   log_density:                                  -3832.934953640867
-#   hamiltonian_energy:                           4145.005901868316
-#   hamiltonian_energy_error:                     -0.07863051782624098
-#   max_hamiltonian_energy_error:                 -0.16790754244266282
-#   tree_depth:                                   7
-#   numerical_error:                              false
-#   step_size:                                    0.00018186972987192408
-#   nom_step_size:                                0.00018186972987192408
-#   is_adapt:                                     false
-#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.
-# [ Info: Sampling Complete.
-# ┌ Info: Current Physics Log-likelihood :
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -132
-# ┌ Info: Current Prior Log-likelihood :
-# └   priorlogpdf(ℓπ, samples[end]) = -219.17544656823006
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, samples[end]) = -3481.509412470054
-
-# julia> sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#            discretization;
-#            draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
-#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.7],
-#            phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0,
-#            priorsNNw = (0.0, 3.0),
-#            saveats = [1 / 100.0, 1 / 100.0],
-#            Dict_differentials = Dict_differentials,
-#            progress = true)
-# ┌ Info: Current Physics Log-likelihood : 
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -394622
-# ┌ Info: Current Prior Log-likelihood : 
-# └   priorlogpdf(ℓπ, initial_θ) = -214.1657203956881
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, initial_θ) = -107600.2750860966
-# ┌ Info: Current L2_LOSSY : 
-# └   ℓπ.L2_loss2(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -352.339686469935
-# Sampling 100%|███████████████████████████████| Time: 0:38:42
-#   iterations:                                   90
-#   ratio_divergent_transitions:                  0.24
-#   ratio_divergent_transitions_during_adaption:  0.02
-#   n_steps:                                      34
-#   is_accept:                                    true
-#   acceptance_rate:                              0.0755469536430885
-#   log_density:                                  -6535.135018473582
-#   hamiltonian_energy:                           6681.540376258076
-#   hamiltonian_energy_error:                     -1.7097735125544204
-#   max_hamiltonian_energy_error:                 1216.239238705054
-#   tree_depth:                                   5
-#   numerical_error:                              true
-#   step_size:                                    0.0004111092751764056
-#   nom_step_size:                                0.0004111092751764056
-#   is_adapt:                                     false
-#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.
-# [ Info: Sampling Complete.
-# ┌ Info: Current Physics Log-likelihood : 
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -272
-# ┌ Info: Current Prior Log-likelihood : 
-# └   priorlogpdf(ℓπ, samples[end]) = -218.6535874132563
-# ┌ Info: Current MSE against dataset Log-likelihood : 
-# └   L2LossData(ℓπ, samples[end]) = -3573.449092586736
-# ┌ Info: Current L2_LOSSY : 
-# └   ℓπ.L2_loss2(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -2470.35523478
-
-using MCMCChains
-println(summarize(sol1.original.mcmc_chain))
-plot(sol1.original.mcmc_chain)
-
-sol3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.7], l2std = [0.15], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol3.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol4.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-sol0_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-
-julia> sol5_new = ahmc_bayesian_pinn_pde(pde_system,
-           discretization_new;
-           draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
-           bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-           phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-           priorsNNw = (0.0, 1.0),
-           saveats = [1 / 100.0, 1 / 100.0],
-           progress = true)
-
-sol1_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_1_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.5],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_2_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_3_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.2],
-    phystd = [0.3], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol2_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-
-sol3_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol4_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 160, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol5_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-# phi = discretization.phi[1]
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-plotly()
-p1 = plot(ts, xs, u_predict, title = "predict")
-p2 = plot(ts, xs, u_real, title = "analytic")
-p3 = plot(ts, xs, diff_u, title = "error")
-plot(p1, p2, p3)
-# julia> sol0_new = ahmc_bayesian_pinn_pde(pde_system,
-#            discretization_new;
-#            draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-#            phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-#            priorsNNw = (0.0, 1.0),
-#            saveats = [1 / 100.0, 1 / 100.0],
-#            Dict_differentials = Dict_differentials,
-#            progress = true)
-# ┌ Info: Current Physics Log-likelihood :
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -398314.38213382766
-# ┌ Info: Current Prior Log-likelihood :
-# └   priorlogpdf(ℓπ, initial_θ) = -104.7365701596561
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, initial_θ) = -58553.36940699288
-# ┌ Info: Current L2_LOSSY :
-# └   ℓπ.L2_loss2(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -281.85131447737575
-# Sampling 100%|███████████████████████████████| Time: 0:26:00
-#   iterations:                                   110
-#   ratio_divergent_transitions:                  0.2
-#   ratio_divergent_transitions_during_adaption:  0.03
-#   n_steps:                                      11
-#   is_accept:                                    true
-#   acceptance_rate:                              0.0024891070448310416
-#   log_density:                                  -13158.729119075539
-#   hamiltonian_energy:                           13212.763613683248
-#   hamiltonian_energy_error:                     0.0
-#   max_hamiltonian_energy_error:                 1492.7356803165876
-#   tree_depth:                                   3
-#   numerical_error:                              true
-#   step_size:                                    0.0002145156661425442
-#   nom_step_size:                                0.0002145156661425442
-#   is_adapt:                                     false
-#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.0, 1.0, 1.0, 1.0, 1 ...])
-# [ Info: Sampling Complete.
-# ┌ Info: Current Physics Log-likelihood :
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -908.7769621441158
-# ┌ Info: Current Prior Log-likelihood :
-# └   priorlogpdf(ℓπ, samples[end]) = -136.87645881663929
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, samples[end]) = -1404.7102059521355
-# ┌ Info: Current L2_LOSSY :
-# └   ℓπ.L2_loss2(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -10708.363203924739
-
-# julia> sol2_new = ahmc_bayesian_pinn_pde(pde_system,
-#            discretization_new;
-#            draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
-#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-#            phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-#            priorsNNw = (0.0, 1.0),
-#            saveats = [1 / 100.0, 1 / 100.0],
-#            progress = true)
-# ┌ Info: Current Physics Log-likelihood :
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -397526.19267355377
-# ┌ Info: Current Prior Log-likelihood :
-# └   priorlogpdf(ℓπ, initial_θ) = -105.03439044100367
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, initial_θ) = -60957.24454333089
-# Sampling  99%|███████████████████████████████|  ETA: 0:00:10
-#   iterations:                                   140
-#   ratio_divergent_transitions:                  0.0
-#   ratio_divergent_transitions_during_adaption:  0.01
-#   n_steps:                                      1023
-#   is_accept:                                    true
-#   acceptance_rate:                              0.972620625460237
-#   log_density:                                  -1513.1769839294327
-#   hamiltonian_energy:                           8709.204139640105
-#   hamiltonian_energy_error:                     -0.4925547801958601
-#   max_hamiltonian_energy_error:                 -1.7861646674082294
-#   tree_depth:                                   10
-#   numerical_error:                              false
-#   step_size:                                    0.00011428277138492957
-#   nom_step_size:                                0.00011428277138492957
-#   is_adapt:                                     false
-#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.0, 1.0, 1.0, 1.0, 1 ...])
-# [ Info: Sampling Complete.
-# ┌ Info: Current Physics Log-likelihood :
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = 115.103823132341
-# ┌ Info: Current Prior Log-likelihood :
-# └   priorlogpdf(ℓπ, samples[end]) = -198.39103020815858
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, samples[end]) = -1429.7843027541815
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file
diff --git a/test/BPINN_pde_experimental.jl b/test/BPINN_pde_experimental.jl
new file mode 100644
index 0000000000..a8f4a0341e
--- /dev/null
+++ b/test/BPINN_pde_experimental.jl
@@ -0,0 +1,1669 @@
+using Test, MCMCChains, Lux, ModelingToolkit
+import ModelingToolkit: Interval, infimum, supremum
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using AdvancedHMC, Statistics, Random, Functors
+using NeuralPDE, MonteCarloMeasurements
+using ComponentArrays, ModelingToolkit
+
+Random.seed!(100)
+
+# function required to use the new loss, creates a dicitonary of differntial operator terms
+function recur_expression(exp, Dict_differentials)
+    for in_exp in exp.args
+        if !(in_exp isa Expr)
+            # skip +,== symbols, characters etc
+            continue
+
+        elseif in_exp.args[1] isa ModelingToolkit.Differential
+            # first symbol of differential term
+            # Dict_differentials for masking differential terms
+            # and resubstituting differentials in equations after putting in interpolations
+            # temp = in_exp.args[end]
+            Dict_differentials[eval(in_exp)] = Symbolics.variable("diff_$(length(Dict_differentials) + 1)")
+            return
+        else
+            recur_expression(in_exp, Dict_differentials)
+        end
+    end
+end
+
+# experiments are here
+println("Example 3: Lotka Volterra with New parameter estimation")
+@parameters t α β γ δ
+@variables x(..) y(..)
+
+Dt = Differential(t)
+eqs = [Dt(x(t)) * α ~ x(t) - β * x(t) * y(t), Dt(y(t)) * δ ~ x(t) * y(t) - y(t) * γ]
+bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
+domains = [t ∈ Interval(0.0, 7.0)]
+
+# Define the parameters' values
+# α, β, γ, δ = p
+
+# regular equations
+# dx = (1.5 - y) * x # prey
+# dy = (x - 3.0) * y # predator
+# p = [1.5, 1.0, 3.0, 1.0] non transformed values
+
+# transformed equations
+# dx*0.666 = (1 - 0.666 * y) * x # prey
+# dy*1.0 = (x - 3.0) * y # predator
+# p = [0.666, 0.666, 3.0, 1.0] transformed values (change is scale also ensured!)
+
+chainl = [
+    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin), Lux.Dense(5, 1)),
+    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin), Lux.Dense(5, 1))
+]
+
+initl, st = Lux.setup(Random.default_rng(), chainl[1])
+initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
+
+using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    # Model parameters. 
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+# initial-value problem.
+u0 = [1.0, 1.0]
+# p = [2/3, 2/3, 1/3.0, 1/3.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 7.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+solution1 = solve(prob, Tsit5(); saveat = 0.02)
+
+function calculate_errors(approx_sol, solution_points)
+    # Check vector lengths match
+    if length(approx_sol) != length(solution_points)
+        error("Vectors must have the same length")
+    end
+
+    # Calculate errors
+    n = length(approx_sol)
+    errors = randn(n)
+    for i in 1:n
+        errors[i] = solution_points[i] - approx_sol[i]
+    end
+
+    # Calculate RMSE
+    rmse = sqrt(mean(errors .^ 2))
+
+    # Calculate MAE
+    mae = mean(abs.(errors))
+
+    # Calculate maximum absolute error
+    max_error = maximum(abs.(errors))
+
+    # Return dictionary with errors
+    return Dict(
+        "RMSE" => rmse,
+        "MAE" => mae,
+        "Max Abs Error" => max_error
+    )
+end
+u = hcat(solution1.u...)
+
+a1 = calculate_errors(pmean(sol6_1.ensemblesol[1]), u1[1, :])
+b1 = calculate_errors(pmean(sol6_1.ensemblesol[2]), u1[2, :])
+
+a = calculate_errors(pmean(sol6_2.ensemblesol[1]), u[1, :])
+b = calculate_errors(pmean(sol6_2.ensemblesol[2]), u[2, :])
+
+c = calculate_errors(pmean(sol6_L2_2.ensemblesol[1]), u[1, :])
+d = calculate_errors(pmean(sol6_L2_2.ensemblesol[2]), u[2, :])
+
+e = calculate_errors(pmean(sol6_L2_1.ensemblesol[1]), u[1, :])
+f = calculate_errors(pmean(sol6_L2_1.ensemblesol[2]), u[2, :])
+
+g = calculate_errors(pmean(sol6_L2.ensemblesol[1]), u[1, :])
+h = calculate_errors(pmean(sol6_L2.ensemblesol[2]), u[2, :])
+
+# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
+#     smoothed_data = similar(data, T, length(data))
+
+#     for i in 1:length(data)
+#         start_idx = max(1, i - window_size)
+#         end_idx = min(length(data), i + window_size)
+#         smoothed_data[i] = mean(data[start_idx:end_idx])
+#     end
+
+#     return smoothed_data'
+# end
+
+# Extract solution
+time = solution.t
+u = hcat(solution.u...)
+time1 = solution.t
+u_noisy = u .+ u .* (0.2 .* randn(size(u)))
+u_noisy0 = u .+ (3.0 .* rand(size(u)[1], size(u)[2]) .- 1.5)
+u_noisy1 = u .+ (0.8 .* randn(size(Array(solution))))
+u_noisy2 = u .+ (0.5 .* randn(size(Array(solution))))
+
+plot(time, u[1, :])
+plot!(time, u[2, :])
+scatter!(time1, u_noisy0[1, :])
+scatter!(time1, u_noisy0[2, :])
+scatter!(discretization_08_gaussian.dataset[1][1][:, 2],
+    discretization_08_gaussian.dataset[1][1][:, 1])
+scatter!(discretization_08_gaussian.dataset[1][2][:, 2],
+    discretization_08_gaussian.dataset[1][2][:, 1])
+
+scatter!(discretization_05_gaussian.dataset[1][1][:, 2],
+    discretization_05_gaussian.dataset[1][1][:, 1])
+scatter!(discretization_05_gaussian.dataset[1][2][:, 2],
+    discretization_05_gaussian.dataset[1][2][:, 1])
+# discretization_05_gaussian.dataset[1][1][:,2]
+# window_size = 5
+# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
+#                      for i in 1:length(solution.u[1])]
+# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
+
+# Randomly select some points from the solution
+num_points = 100  # Number of points to select
+selected_indices = rand(1:size(u_noisy1, 2), num_points)
+upoints = [u_noisy1[:, i] for i in selected_indices]
+timepoints = [time[i] for i in selected_indices]
+temp = hcat(upoints...)
+dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
+
+discretization_uniform = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+discretization_08_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+discretization_05_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+
+discretization1 = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+
+scatter!(discretization.dataset[1][1][:, 2], discretization.dataset[1][1][:, 1])
+scatter!(discretization.dataset[1][2][:, 2], discretization.dataset[1][2][:, 1])
+
+sol = solve(prob, Tsit5(); saveat = 0.1)
+odedata = Array(sol) + 0.8 * randn(size(Array(sol)))
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [x(t), y(t)],
+    [α, β, γ, δ],
+    defaults = Dict([α => 2, β => 2, γ => 2, δ => 2]))
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_uniform = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+# more iterations for above
+sol3_100_uniform_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+# more iterations for above + strict BC
+sol3_100_uniform_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000_bc_hard = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.05, 0.05],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol3_100_05_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+sol4_0 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# 70 points in dataset
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# SOL6_1 VS SOL6_L2
+sol6_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_2_L2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol6_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_L2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol6_L2_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol6_L2_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.05, 0.05],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+# 50 datapoint 0-5 sol5 vs sol4
+# julia> sol4.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.549 ± 0.0058
+#  0.71 ± 0.0042
+#  0.408 ± 0.0063
+#  0.355 ± 0.0015
+
+# julia> sol5.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0052
+#  0.702 ± 0.0034
+#  0.346 ± 0.0037
+#  0.335 ± 0.0013
+
+# 100 datapoint 0-5 sol5_2 vs sol3
+# julia> sol3.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.598 ± 0.0037
+#  0.711 ± 0.0027
+#  0.399 ± 0.0032
+#  0.333 ± 0.0011
+
+# julia> sol5_2.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0035
+#  0.686 ± 0.0026
+#  0.395 ± 0.0029
+#  0.328 ± 0.00095
+
+# timespan for full dataset (0-8)
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true)
+
+sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true
+)
+
+sol7 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], progress = true
+)
+
+sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+using Plots, StatsPlots
+plotly()
+plot(time, u[1, :])
+plot!(time, u[2, :])
+scatter!(time, u_noisy[1, :])
+scatter!(time, u_noisy[2, :])
+scatter!(discretization.dataset[1][1][:, 2], discretization.dataset[1][1][:, 1])
+scatter!(discretization.dataset[1][2][:, 2], discretization.dataset[1][2][:, 1])
+
+scatter!(discretization1.dataset[1][1][:, 2],
+    discretization1.dataset[1][1][:, 1], legend = nothing)
+scatter!(discretization1.dataset[1][2][:, 2], discretization1.dataset[1][2][:, 1])
+
+# plot28(sol4 seems better vs sol3 plots, params seems similar)
+plot!(sol3.timepoints[1]', sol3.ensemblesol[1])
+plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
+plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
+plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2])
+
+plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
+plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
+plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
+plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2])
+
+plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1], legend = nothing)
+plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
+plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1], legend = nothing)
+plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
+
+plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1], legend = nothing)
+plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
+plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
+plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
+plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1], legend = nothing)
+plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
+
+# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
+plot!(sol5.timepoints[1]', sol5.ensemblesol[1], legend = nothing)
+plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
+
+plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1], legend = nothing)
+plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2])
+
+plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1], legend = nothing)
+plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
+
+plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
+plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
+plot!(sol6_L2.timepoints[1]', sol6_L2.ensemblesol[1])
+plot!(sol6_L2.timepoints[2]', sol6_L2.ensemblesol[2])
+
+plot!(sol6_L2_1.timepoints[1]', sol6_L2_1.ensemblesol[1])
+plot!(sol6_L2_1.timepoints[2]', sol6_L2_1.ensemblesol[2])
+
+plot!(sol6_L2_2.timepoints[1]', sol6_L2_2.ensemblesol[1])
+plot!(sol6_L2_2.timepoints[2]', sol6_L2_2.ensemblesol[2])
+
+plot!(sol6_1.timepoints[1]', sol6_1.ensemblesol[1])
+plot!(sol6_1.timepoints[2]', sol6_1.ensemblesol[2])
+plot!(sol6_2.timepoints[1]', sol6_2.ensemblesol[1])
+plot!(sol6_2.timepoints[2]', sol6_2.ensemblesol[2], legend = nothing)
+plot!(sol6_2_L2.timepoints[1]', sol6_2_L2.ensemblesol[1])
+plot!(sol6_2_L2.timepoints[2]', sol6_2_L2.ensemblesol[2], legend = nothing)
+
+# plot52 sol7 vs sol5(sol5 overall better plots, params?)
+plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
+plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
+
+# sol8,sol8_2,sol9,sol9_2 bad
+plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
+plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
+plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
+plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
+
+plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
+plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
+plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
+plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
+
+plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
+plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2], legend = nothing)
+
+plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
+plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2], legend = nothing)
+plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
+plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
+
+plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
+plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
+
+plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1], legend = nothing)
+plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
+plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1], legend = nothing)
+plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
+
+plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
+plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2], legend = nothing)
+
+plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1], legend = nothing)
+plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
+
+plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
+plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2], legend = nothing)
+plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
+plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2], legend = nothing)
+
+plot!(sol3_100_05_gaussian.timepoints[1]', sol3_100_05_gaussian.ensemblesol[1])
+plot!(sol3_100_05_gaussian.timepoints[2]',
+    sol3_100_05_gaussian.ensemblesol[2], legend = nothing)
+
+plot!(sol3_100_05_gaussian_new.timepoints[1]', sol3_100_05_gaussian_new.ensemblesol[1])
+plot!(sol3_100_05_gaussian_new.timepoints[2]', sol3_100_05_gaussian_new.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian.timepoints[1]', sol3_100_08_gaussian.ensemblesol[1])
+plot!(sol3_100_08_gaussian.timepoints[2]', sol3_100_08_gaussian.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_new.timepoints[1]', sol3_100_08_gaussian_new.ensemblesol[1])
+plot!(sol3_100_08_gaussian_new.timepoints[2]',
+    sol3_100_08_gaussian_new.ensemblesol[2], legend = nothing)
+
+plot!(sol3_100_uniform.timepoints[1]', sol3_100_uniform.ensemblesol[1])
+plot!(sol3_100_uniform.timepoints[2]', sol3_100_uniform.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_1000.timepoints[1]', sol3_100_08_gaussian_1000.ensemblesol[1])
+plot!(sol3_100_08_gaussian_1000.timepoints[2]', sol3_100_08_gaussian_1000.ensemblesol[2])
+
+plot!(sol3_100_05_gaussian_1000.timepoints[1]', sol3_100_05_gaussian_1000.ensemblesol[1])
+plot!(sol3_100_05_gaussian_1000.timepoints[2]', sol3_100_05_gaussian_1000.ensemblesol[2])
+
+plot!(sol3_100_uniform_1000.timepoints[1]', sol3_100_uniform_1000.ensemblesol[1])
+plot!(sol3_100_uniform_1000.timepoints[2]', sol3_100_uniform_1000.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_1000_bc.timepoints[1]',
+    sol3_100_08_gaussian_1000_bc.ensemblesol[1])
+plot!(sol3_100_08_gaussian_1000_bc.timepoints[2]',
+    sol3_100_08_gaussian_1000_bc.ensemblesol[2])
+
+# test with lower number of points
+# consider full range dataset case
+# combination of all above
+
+# run 1 100 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol3.estimated_de_params
+sol4.estimated_de_params
+
+# p = [2/3, 2/3, 1/3, 1/3]
+sol3.estimated_de_params
+sol4.estimated_de_params
+
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+    dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)], progress = true)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=0.1
+@test mean(u_predict .- u_real) < 0.01
+@test sol1.estimated_de_params[1]≈param atol=0.1
+sol1.estimated_de_params[1]
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+param = 2 * π
+ts_2 = vec(sol2.timepoints[1])
+u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict_2 = pmean(sol2.ensemblesol[1])
+
+@test u_predict_2≈u_real_2 atol=0.1
+@test mean(u_predict_2 .- u_real_2) < 0.01
+@test sol2.estimated_de_params[1]≈param atol=0.1
+sol2.estimated_de_params[1]
+
+plot(ts_2, u_predict_2)
+plot!(ts_2, u_real_2)
+
+@parameters t, σ_
+@variables x(..), y(..), z(..)
+Dt = Differential(t)
+eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 1.0)]
+
+input_ = length(domains)
+n = 7
+chain = [
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1))
+]
+
+#Generate Data
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
+end
+
+u0 = [1.0; 0.0; 0.0]
+tspan = (0.0, 1.0)
+prob = ODEProblem(lorenz!, u0, tspan)
+sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+ts = sol.t
+us = hcat(sol.u...)
+us = us .+ ((0.05 .* randn(size(us))) .* us)
+ts_ = hcat(sol(ts).t...)[1, :]
+dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+    dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs, bcs, domains,
+    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100,
+    bcstd = [0.3, 0.3, 0.3],
+    phystd = [0.1, 0.1, 0.1],
+    l2std = [1, 1, 1],
+    priorsNNw = (0.0, 1.0),
+    saveats = [0.01],
+    param = [Normal(14.0, 2)], progress = true)
+
+idealp = 10.0
+p_ = sol1.estimated_de_params[1]
+@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+
+@parameters x y
+@variables u(..)
+Dxx = Differential(x)^2
+Dyy = Differential(y)^2
+
+# 2D PDE
+eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# Boundary conditions
+bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+
+# Space and time domains
+domains = [x ∈ Interval(0.0, 1.0),
+    y ∈ Interval(0.0, 1.0)]
+
+# Neural network
+dim = 2 # number of dimensions
+chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
+
+# Discretization
+dx = 0.04
+discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
+
+@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 5,
+    bcstd = [0.01, 0.01, 0.01, 0.01],
+    phystd = [0.005],
+    priorsNNw = (0.0, 2.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+xs = sol1.timepoints[1]
+sol1.ensemblesol[1]
+analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+
+dataset = hcat(u_real, xs')
+u_predict = pmean(sol1.ensemblesol[1])
+u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
+@test u_predict≈u_real atol=0.8
+
+# KS EQUATION
+using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+import ModelingToolkit: Interval, infimum, supremum, Distributions
+using Plots, MonteCarloMeasurements, StatsPlots
+# plotly()
+
+@parameters x, t, α
+@variables u(..)
+Dt = Differential(t)
+Dx = Differential(x)
+Dx2 = Differential(x)^2
+Dx3 = Differential(x)^3
+Dx4 = Differential(x)^4
+
+# α = 1
+β = 4
+γ = 1
+eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+bcs = [u(x, 0) ~ u_analytic(x, 0),
+    u(-10, t) ~ u_analytic(-10, t),
+    u(10, t) ~ u_analytic(10, t),
+    Dx(u(-10, t)) ~ du(-10, t),
+    Dx(u(10, t)) ~ du(10, t)]
+
+# Space and time domains
+domains = [x ∈ Interval(-10.0, 10.0),
+    t ∈ Interval(0.0, 1.0)]
+
+# Discretization
+dx = 0.4;
+dt = 0.2;
+
+# Function to compute analytical solution at a specific point (x, t)
+function u_analytic_point(x, t)
+    z = -x / 2 + t
+    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+end
+
+# Function to generate the dataset matrix
+function generate_dataset_matrix(domains, dx, dt, xlim, tlim)
+    x_values = xlim[1]:dx:xlim[2]
+    t_values = tlim[1]:dt:tlim[2]
+
+    dataset = []
+
+    for t in t_values
+        for x in x_values
+            u_value = u_analytic_point(x, t)
+            push!(dataset, [u_value, x, t])
+        end
+    end
+
+    return vcat([data' for data in dataset]...)
+end
+
+datasetpde = [generate_dataset_matrix(domains, dx, dt, [-10, 10], [0.0, 1.0])]
+datasetpde_new = [generate_dataset_matrix(domains, dx, dt, [-10, 0], [0.0, 1.0])]
+
+# noise to dataset
+noisydataset = deepcopy(datasetpde)
+noisydataset[1][:, 1] = noisydataset[1][:, 1] .+ (randn(size(noisydataset[1][:, 1])) .* 0.8)
+
+noisydataset_new = deepcopy(datasetpde_new)
+noisydataset_new[1][:, 1] = noisydataset_new[1][:, 1] .+
+                            (randn(size(noisydataset_new[1][:, 1])) .* 0.8)
+
+plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+scatter!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+plot(datasetpde[1][:, 2], datasetpde[1][:, 3], datasetpde[1][:, 1],
+    title = "Dataset from Analytical Solution")
+scatter!(noisydataset[1][:, 2], noisydataset[1][:, 3], noisydataset[1][:, 1])
+
+plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 1],
+    title = "Dataset from Analytical Solution")
+scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 1])
+
+plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 3],
+    datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
+scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 3], noisydataset_new[1][:, 1])
+
+noise_std = 1.4
+original_data = datasetpde[1][:, 1]
+original_std = std(original_data)
+ratio = noise_std / original_std
+
+# Neural network
+chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+    Lux.Dense(8, 8, Lux.tanh),
+    Lux.Dense(8, 1))
+
+discretization = NeuralPDE.BayesianPINN([chain],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+discretization_new = NeuralPDE.BayesianPINN([chain],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset_new, nothing])
+
+@named pde_system = PDESystem(eq,
+    bcs,
+    domains,
+    [x, t],
+    [u(x, t)],
+    [α],
+    defaults = Dict([α => 2.0]))
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.7],
+    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.7], l2std = [0.15], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol3.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol4.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+sol0_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+julia > sol5_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol1_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_1_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_2_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_3_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+    phystd = [0.3], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol2_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol3_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol4_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 160, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol5_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+p1 = plot(ts, xs, u_predict, title = "predict")
+p2 = plot(ts, xs, u_real, title = "analytic")
+p3 = plot(ts, xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+# MCMC chain analysis
+plot(sol1.original.mcmc_chain)
+plot(sol2.original.mcmc_chain)
+
+plot(sol0_new.original.mcmc_chain)
+plot(sol2_new.original.mcmc_chain)
+
+plot(sol1.original.mcmc_chain)
+meanplot(sol1.original.mcmc_chain)
+autocorplot(sol1.original.mcmc_chain)
+traceplot(sol1.original.mcmc_chain)
+
+plot(sol2.original.mcmc_chain)
+meanplot(sol2.original.mcmc_chain)
+autocorplot(sol2.original.mcmc_chain)
+traceplot(sol2.original.mcmc_chain)
+
+plot(sol0_new.original.mcmc_chain)
+meanplot(sol0_new.original.mcmc_chain)
+autocorplot(sol0_new.original.mcmc_chain)
+
+plot(sol2_new.original.mcmc_chain)
+meanplot(sol2_new.original.mcmc_chain)
+autocorplot(sol2_new.original.mcmc_chain)
+
+plot(sol3_new.original.mcmc_chain)
+meanplot(sol3_new.original.mcmc_chain)
+autocorplot(sol3_new.original.mcmc_chain)
\ No newline at end of file
diff --git a/test/bpinnexperimental.jl b/test/bpinnexperimental.jl
deleted file mode 100644
index a8a389ad44..0000000000
--- a/test/bpinnexperimental.jl
+++ /dev/null
@@ -1,140 +0,0 @@
-using Test, MCMCChains
-using ForwardDiff, Distributions, OrdinaryDiffEq
-using Flux, OptimizationOptimisers, AdvancedHMC, Lux
-using Statistics, Random, Functors, ComponentArrays
-using NeuralPDE, MonteCarloMeasurements
-
-Random.seed!(110)
-
-using NeuralPDE, Lux, Plots, OrdinaryDiffEq, Distributions, Random
-
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u0 = [1.0, 1.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 4.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-
-# Solve using OrdinaryDiffEq.jl solver
-dt = 0.2
-solution = solve(prob, Tsit5(); saveat = dt)
-
-times = solution.t
-u = hcat(solution.u...)
-x = u[1, :] + (u[1, :]) .* (0.3 .* randn(length(u[1, :])))
-y = u[2, :] + (u[2, :]) .* (0.3 .* randn(length(u[2, :])))
-dataset = [x, y, times]
-
-plot(times, x, label = "noisy x")
-plot!(times, y, label = "noisy y")
-plot!(solution, labels = ["x" "y"])
-
-chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-    Lux.Dense(6, 2))
-
-alg1 = BNNODE(chain;
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [0.1, 0.1],
-    phystd = [0.1, 0.1],
-    priorsNNw = (0.0, 3.0),
-    param = [
-        Normal(1, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(0, 2)], progress = true)
-
-alg2 = BNNODE(chain;
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [0.1, 0.1],
-    phystd = [0.1, 0.1],
-    priorsNNw = (0.0, 3.0),
-    param = [
-        Normal(1, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(0, 2)], estim_collocate = true, progress = true)
-
-@time sol_pestim1 = solve(prob, alg1; saveat = dt)
-@time sol_pestim2 = solve(prob, alg2; saveat = dt)
-plot(times, sol_pestim1.ensemblesol[1], label = "estimated x1")
-plot!(times, sol_pestim2.ensemblesol[1], label = "estimated x2")
-plot!(times, sol_pestim1.ensemblesol[2], label = "estimated y1")
-plot!(times, sol_pestim2.ensemblesol[2], label = "estimated y2")
-
-# comparing it with the original solution
-plot!(solution, labels = ["true x" "true y"])
-
-@show sol_pestim1.estimated_de_params
-@show sol_pestim2.estimated_de_params
-
-function fitz(u, p, t)
-    v, w = u[1], u[2]
-    a, b, τinv, l = p[1], p[2], p[3], p[4]
-
-    dv = v - 0.33 * v^3 - w + l
-    dw = τinv * (v + a - b * w)
-
-    return [dv, dw]
-end
-
-prob_ode_fitzhughnagumo = ODEProblem(
-    fitz, [1.0, 1.0], (0.0, 10.0), [0.7, 0.8, 1 / 12.5, 0.5])
-dt = 0.5
-sol = solve(prob_ode_fitzhughnagumo, Tsit5(), saveat = dt)
-
-sig = 0.20
-data = Array(sol)
-dataset = [data[1, :] .+ (sig .* rand(length(sol.t))),
-    data[2, :] .+ (sig .* rand(length(sol.t))), sol.t]
-priors = [Normal(0.5, 1.0), Normal(0.5, 1.0), Normal(0.0, 0.5), Normal(0.5, 1.0)]
-
-plot(sol.t, dataset[1], label = "noisy x")
-plot!(sol.t, dataset[2], label = "noisy y")
-plot!(sol, labels = ["x" "y"])
-
-chain = Lux.Chain(Lux.Dense(1, 10, tanh), Lux.Dense(10, 10, tanh),
-    Lux.Dense(10, 2))
-
-Adaptorkwargs = (Adaptor = AdvancedHMC.StanHMCAdaptor,
-    Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.8)
-alg1 = BNNODE(chain;
-dataset = dataset,
-draw_samples = 1000,
-l2std = [0.1, 0.1],
-phystd = [0.1, 0.1],
-priorsNNw = (0.01, 3.0),
-Adaptorkwargs = Adaptorkwargs,
-param = priors, progress = true)
-
-alg2 = BNNODE(chain;
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [0.1, 0.1],
-    phystd = [0.1, 0.1],
-    priorsNNw = (0.01, 3.0),
-    Adaptorkwargs = Adaptorkwargs,
-    param = priors, estim_collocate = true, progress = true)
-
-@time sol_pestim3 = solve(prob_ode_fitzhughnagumo, alg1; saveat = dt)
-@time sol_pestim4 = solve(prob_ode_fitzhughnagumo, alg2; saveat = dt)
-plot!(sol.t, sol_pestim3.ensemblesol[1], label = "estimated x1")
-plot!(sol.t, sol_pestim4.ensemblesol[1], label = "estimated x2")
-plot!(sol.t, sol_pestim3.ensemblesol[2], label = "estimated y1")
-plot!(sol.t, sol_pestim4.ensemblesol[2], label = "estimated y2")
-
-@show sol_pestim3.estimated_de_params
-@show sol_pestim4.estimated_de_params

From 94770d4a96fc9ee15deaa0ce8296adafce69aa08 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 12 May 2024 11:13:18 +0530
Subject: [PATCH 045/107] need PDE exp file to be concise

---
 src/NeuralPDE.jl               |    1 -
 test/BPINN_pde_experimental.jl | 1669 --------------------------------
 2 files changed, 1670 deletions(-)
 delete mode 100644 test/BPINN_pde_experimental.jl

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 920387340a..1122afc838 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -54,7 +54,6 @@ include("advancedHMC_MCMC.jl")
 include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 include("dgm.jl")
-include("collocated_estim.jl")
 
 export NNODE, NNDAE,
        PhysicsInformedNN, discretize,
diff --git a/test/BPINN_pde_experimental.jl b/test/BPINN_pde_experimental.jl
deleted file mode 100644
index a8f4a0341e..0000000000
--- a/test/BPINN_pde_experimental.jl
+++ /dev/null
@@ -1,1669 +0,0 @@
-using Test, MCMCChains, Lux, ModelingToolkit
-import ModelingToolkit: Interval, infimum, supremum
-using ForwardDiff, Distributions, OrdinaryDiffEq
-using AdvancedHMC, Statistics, Random, Functors
-using NeuralPDE, MonteCarloMeasurements
-using ComponentArrays, ModelingToolkit
-
-Random.seed!(100)
-
-# function required to use the new loss, creates a dicitonary of differntial operator terms
-function recur_expression(exp, Dict_differentials)
-    for in_exp in exp.args
-        if !(in_exp isa Expr)
-            # skip +,== symbols, characters etc
-            continue
-
-        elseif in_exp.args[1] isa ModelingToolkit.Differential
-            # first symbol of differential term
-            # Dict_differentials for masking differential terms
-            # and resubstituting differentials in equations after putting in interpolations
-            # temp = in_exp.args[end]
-            Dict_differentials[eval(in_exp)] = Symbolics.variable("diff_$(length(Dict_differentials) + 1)")
-            return
-        else
-            recur_expression(in_exp, Dict_differentials)
-        end
-    end
-end
-
-# experiments are here
-println("Example 3: Lotka Volterra with New parameter estimation")
-@parameters t α β γ δ
-@variables x(..) y(..)
-
-Dt = Differential(t)
-eqs = [Dt(x(t)) * α ~ x(t) - β * x(t) * y(t), Dt(y(t)) * δ ~ x(t) * y(t) - y(t) * γ]
-bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
-domains = [t ∈ Interval(0.0, 7.0)]
-
-# Define the parameters' values
-# α, β, γ, δ = p
-
-# regular equations
-# dx = (1.5 - y) * x # prey
-# dy = (x - 3.0) * y # predator
-# p = [1.5, 1.0, 3.0, 1.0] non transformed values
-
-# transformed equations
-# dx*0.666 = (1 - 0.666 * y) * x # prey
-# dy*1.0 = (x - 3.0) * y # predator
-# p = [0.666, 0.666, 3.0, 1.0] transformed values (change is scale also ensured!)
-
-chainl = [
-    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin), Lux.Dense(5, 1)),
-    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin), Lux.Dense(5, 1))
-]
-
-initl, st = Lux.setup(Random.default_rng(), chainl[1])
-initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
-
-using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
-
-function lotka_volterra(u, p, t)
-    # Model parameters. 
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-# initial-value problem.
-u0 = [1.0, 1.0]
-# p = [2/3, 2/3, 1/3.0, 1/3.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 7.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-dt = 0.01
-solution = solve(prob, Tsit5(); saveat = dt)
-solution1 = solve(prob, Tsit5(); saveat = 0.02)
-
-function calculate_errors(approx_sol, solution_points)
-    # Check vector lengths match
-    if length(approx_sol) != length(solution_points)
-        error("Vectors must have the same length")
-    end
-
-    # Calculate errors
-    n = length(approx_sol)
-    errors = randn(n)
-    for i in 1:n
-        errors[i] = solution_points[i] - approx_sol[i]
-    end
-
-    # Calculate RMSE
-    rmse = sqrt(mean(errors .^ 2))
-
-    # Calculate MAE
-    mae = mean(abs.(errors))
-
-    # Calculate maximum absolute error
-    max_error = maximum(abs.(errors))
-
-    # Return dictionary with errors
-    return Dict(
-        "RMSE" => rmse,
-        "MAE" => mae,
-        "Max Abs Error" => max_error
-    )
-end
-u = hcat(solution1.u...)
-
-a1 = calculate_errors(pmean(sol6_1.ensemblesol[1]), u1[1, :])
-b1 = calculate_errors(pmean(sol6_1.ensemblesol[2]), u1[2, :])
-
-a = calculate_errors(pmean(sol6_2.ensemblesol[1]), u[1, :])
-b = calculate_errors(pmean(sol6_2.ensemblesol[2]), u[2, :])
-
-c = calculate_errors(pmean(sol6_L2_2.ensemblesol[1]), u[1, :])
-d = calculate_errors(pmean(sol6_L2_2.ensemblesol[2]), u[2, :])
-
-e = calculate_errors(pmean(sol6_L2_1.ensemblesol[1]), u[1, :])
-f = calculate_errors(pmean(sol6_L2_1.ensemblesol[2]), u[2, :])
-
-g = calculate_errors(pmean(sol6_L2.ensemblesol[1]), u[1, :])
-h = calculate_errors(pmean(sol6_L2.ensemblesol[2]), u[2, :])
-
-# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
-#     smoothed_data = similar(data, T, length(data))
-
-#     for i in 1:length(data)
-#         start_idx = max(1, i - window_size)
-#         end_idx = min(length(data), i + window_size)
-#         smoothed_data[i] = mean(data[start_idx:end_idx])
-#     end
-
-#     return smoothed_data'
-# end
-
-# Extract solution
-time = solution.t
-u = hcat(solution.u...)
-time1 = solution.t
-u_noisy = u .+ u .* (0.2 .* randn(size(u)))
-u_noisy0 = u .+ (3.0 .* rand(size(u)[1], size(u)[2]) .- 1.5)
-u_noisy1 = u .+ (0.8 .* randn(size(Array(solution))))
-u_noisy2 = u .+ (0.5 .* randn(size(Array(solution))))
-
-plot(time, u[1, :])
-plot!(time, u[2, :])
-scatter!(time1, u_noisy0[1, :])
-scatter!(time1, u_noisy0[2, :])
-scatter!(discretization_08_gaussian.dataset[1][1][:, 2],
-    discretization_08_gaussian.dataset[1][1][:, 1])
-scatter!(discretization_08_gaussian.dataset[1][2][:, 2],
-    discretization_08_gaussian.dataset[1][2][:, 1])
-
-scatter!(discretization_05_gaussian.dataset[1][1][:, 2],
-    discretization_05_gaussian.dataset[1][1][:, 1])
-scatter!(discretization_05_gaussian.dataset[1][2][:, 2],
-    discretization_05_gaussian.dataset[1][2][:, 1])
-# discretization_05_gaussian.dataset[1][1][:,2]
-# window_size = 5
-# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
-#                      for i in 1:length(solution.u[1])]
-# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
-
-# Randomly select some points from the solution
-num_points = 100  # Number of points to select
-selected_indices = rand(1:size(u_noisy1, 2), num_points)
-upoints = [u_noisy1[:, i] for i in selected_indices]
-timepoints = [time[i] for i in selected_indices]
-temp = hcat(upoints...)
-dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
-
-discretization_uniform = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-discretization_08_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-discretization_05_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-
-discretization1 = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-
-scatter!(discretization.dataset[1][1][:, 2], discretization.dataset[1][1][:, 1])
-scatter!(discretization.dataset[1][2][:, 2], discretization.dataset[1][2][:, 1])
-
-sol = solve(prob, Tsit5(); saveat = 0.1)
-odedata = Array(sol) + 0.8 * randn(size(Array(sol)))
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [x(t), y(t)],
-    [α, β, γ, δ],
-    defaults = Dict([α => 2, β => 2, γ => 2, δ => 2]))
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_uniform = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-# more iterations for above
-sol3_100_uniform_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-# more iterations for above + strict BC
-sol3_100_uniform_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000_bc_hard = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.05, 0.05],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol3_100_05_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-sol4_0 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# 70 points in dataset
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# SOL6_1 VS SOL6_L2
-sol6_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_2_L2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol6_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_L2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol6_L2_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol6_L2_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.05, 0.05],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-# 50 datapoint 0-5 sol5 vs sol4
-# julia> sol4.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.549 ± 0.0058
-#  0.71 ± 0.0042
-#  0.408 ± 0.0063
-#  0.355 ± 0.0015
-
-# julia> sol5.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0052
-#  0.702 ± 0.0034
-#  0.346 ± 0.0037
-#  0.335 ± 0.0013
-
-# 100 datapoint 0-5 sol5_2 vs sol3
-# julia> sol3.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.598 ± 0.0037
-#  0.711 ± 0.0027
-#  0.399 ± 0.0032
-#  0.333 ± 0.0011
-
-# julia> sol5_2.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0035
-#  0.686 ± 0.0026
-#  0.395 ± 0.0029
-#  0.328 ± 0.00095
-
-# timespan for full dataset (0-8)
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true)
-
-sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true
-)
-
-sol7 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], progress = true
-)
-
-sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-using Plots, StatsPlots
-plotly()
-plot(time, u[1, :])
-plot!(time, u[2, :])
-scatter!(time, u_noisy[1, :])
-scatter!(time, u_noisy[2, :])
-scatter!(discretization.dataset[1][1][:, 2], discretization.dataset[1][1][:, 1])
-scatter!(discretization.dataset[1][2][:, 2], discretization.dataset[1][2][:, 1])
-
-scatter!(discretization1.dataset[1][1][:, 2],
-    discretization1.dataset[1][1][:, 1], legend = nothing)
-scatter!(discretization1.dataset[1][2][:, 2], discretization1.dataset[1][2][:, 1])
-
-# plot28(sol4 seems better vs sol3 plots, params seems similar)
-plot!(sol3.timepoints[1]', sol3.ensemblesol[1])
-plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
-plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
-plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2])
-
-plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
-plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
-plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
-plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2])
-
-plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1], legend = nothing)
-plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
-plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1], legend = nothing)
-plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
-
-plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1], legend = nothing)
-plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
-plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
-plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
-plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1], legend = nothing)
-plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
-
-# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
-plot!(sol5.timepoints[1]', sol5.ensemblesol[1], legend = nothing)
-plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
-
-plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1], legend = nothing)
-plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2])
-
-plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1], legend = nothing)
-plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
-
-plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
-plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
-plot!(sol6_L2.timepoints[1]', sol6_L2.ensemblesol[1])
-plot!(sol6_L2.timepoints[2]', sol6_L2.ensemblesol[2])
-
-plot!(sol6_L2_1.timepoints[1]', sol6_L2_1.ensemblesol[1])
-plot!(sol6_L2_1.timepoints[2]', sol6_L2_1.ensemblesol[2])
-
-plot!(sol6_L2_2.timepoints[1]', sol6_L2_2.ensemblesol[1])
-plot!(sol6_L2_2.timepoints[2]', sol6_L2_2.ensemblesol[2])
-
-plot!(sol6_1.timepoints[1]', sol6_1.ensemblesol[1])
-plot!(sol6_1.timepoints[2]', sol6_1.ensemblesol[2])
-plot!(sol6_2.timepoints[1]', sol6_2.ensemblesol[1])
-plot!(sol6_2.timepoints[2]', sol6_2.ensemblesol[2], legend = nothing)
-plot!(sol6_2_L2.timepoints[1]', sol6_2_L2.ensemblesol[1])
-plot!(sol6_2_L2.timepoints[2]', sol6_2_L2.ensemblesol[2], legend = nothing)
-
-# plot52 sol7 vs sol5(sol5 overall better plots, params?)
-plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
-plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
-
-# sol8,sol8_2,sol9,sol9_2 bad
-plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
-plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
-plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
-plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
-
-plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
-plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
-plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
-plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
-
-plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
-plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2], legend = nothing)
-
-plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
-plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2], legend = nothing)
-plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
-plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
-
-plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
-plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
-
-plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1], legend = nothing)
-plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
-plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1], legend = nothing)
-plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
-
-plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
-plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2], legend = nothing)
-
-plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1], legend = nothing)
-plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
-
-plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
-plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2], legend = nothing)
-plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
-plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2], legend = nothing)
-
-plot!(sol3_100_05_gaussian.timepoints[1]', sol3_100_05_gaussian.ensemblesol[1])
-plot!(sol3_100_05_gaussian.timepoints[2]',
-    sol3_100_05_gaussian.ensemblesol[2], legend = nothing)
-
-plot!(sol3_100_05_gaussian_new.timepoints[1]', sol3_100_05_gaussian_new.ensemblesol[1])
-plot!(sol3_100_05_gaussian_new.timepoints[2]', sol3_100_05_gaussian_new.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian.timepoints[1]', sol3_100_08_gaussian.ensemblesol[1])
-plot!(sol3_100_08_gaussian.timepoints[2]', sol3_100_08_gaussian.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_new.timepoints[1]', sol3_100_08_gaussian_new.ensemblesol[1])
-plot!(sol3_100_08_gaussian_new.timepoints[2]',
-    sol3_100_08_gaussian_new.ensemblesol[2], legend = nothing)
-
-plot!(sol3_100_uniform.timepoints[1]', sol3_100_uniform.ensemblesol[1])
-plot!(sol3_100_uniform.timepoints[2]', sol3_100_uniform.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_1000.timepoints[1]', sol3_100_08_gaussian_1000.ensemblesol[1])
-plot!(sol3_100_08_gaussian_1000.timepoints[2]', sol3_100_08_gaussian_1000.ensemblesol[2])
-
-plot!(sol3_100_05_gaussian_1000.timepoints[1]', sol3_100_05_gaussian_1000.ensemblesol[1])
-plot!(sol3_100_05_gaussian_1000.timepoints[2]', sol3_100_05_gaussian_1000.ensemblesol[2])
-
-plot!(sol3_100_uniform_1000.timepoints[1]', sol3_100_uniform_1000.ensemblesol[1])
-plot!(sol3_100_uniform_1000.timepoints[2]', sol3_100_uniform_1000.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_1000_bc.timepoints[1]',
-    sol3_100_08_gaussian_1000_bc.ensemblesol[1])
-plot!(sol3_100_08_gaussian_1000_bc.timepoints[2]',
-    sol3_100_08_gaussian_1000_bc.ensemblesol[2])
-
-# test with lower number of points
-# consider full range dataset case
-# combination of all above
-
-# run 1 100 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol3.estimated_de_params
-sol4.estimated_de_params
-
-# p = [2/3, 2/3, 1/3, 1/3]
-sol3.estimated_de_params
-sol4.estimated_de_params
-
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-    dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)], progress = true)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=0.1
-@test mean(u_predict .- u_real) < 0.01
-@test sol1.estimated_de_params[1]≈param atol=0.1
-sol1.estimated_de_params[1]
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-param = 2 * π
-ts_2 = vec(sol2.timepoints[1])
-u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict_2 = pmean(sol2.ensemblesol[1])
-
-@test u_predict_2≈u_real_2 atol=0.1
-@test mean(u_predict_2 .- u_real_2) < 0.01
-@test sol2.estimated_de_params[1]≈param atol=0.1
-sol2.estimated_de_params[1]
-
-plot(ts_2, u_predict_2)
-plot!(ts_2, u_real_2)
-
-@parameters t, σ_
-@variables x(..), y(..), z(..)
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-
-input_ = length(domains)
-n = 7
-chain = [
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1))
-]
-
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
-end
-
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-ts = sol.t
-us = hcat(sol.u...)
-us = us .+ ((0.05 .* randn(size(us))) .* us)
-ts_ = hcat(sol(ts).t...)[1, :]
-dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-    dataset = [dataset, nothing])
-
-@named pde_system = PDESystem(eqs, bcs, domains,
-    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100,
-    bcstd = [0.3, 0.3, 0.3],
-    phystd = [0.1, 0.1, 0.1],
-    l2std = [1, 1, 1],
-    priorsNNw = (0.0, 1.0),
-    saveats = [0.01],
-    param = [Normal(14.0, 2)], progress = true)
-
-idealp = 10.0
-p_ = sol1.estimated_de_params[1]
-@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-
-@parameters x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-
-# 2D PDE
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# Boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
-
-# Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
-
-# Neural network
-dim = 2 # number of dimensions
-chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
-
-# Discretization
-dx = 0.04
-discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
-
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 5,
-    bcstd = [0.01, 0.01, 0.01, 0.01],
-    phystd = [0.005],
-    priorsNNw = (0.0, 2.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-xs = sol1.timepoints[1]
-sol1.ensemblesol[1]
-analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-
-dataset = hcat(u_real, xs')
-u_predict = pmean(sol1.ensemblesol[1])
-u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-@test u_predict≈u_real atol=0.8
-
-# KS EQUATION
-using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-import ModelingToolkit: Interval, infimum, supremum, Distributions
-using Plots, MonteCarloMeasurements, StatsPlots
-# plotly()
-
-@parameters x, t, α
-@variables u(..)
-Dt = Differential(t)
-Dx = Differential(x)
-Dx2 = Differential(x)^2
-Dx3 = Differential(x)^3
-Dx4 = Differential(x)^4
-
-# α = 1
-β = 4
-γ = 1
-eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-bcs = [u(x, 0) ~ u_analytic(x, 0),
-    u(-10, t) ~ u_analytic(-10, t),
-    u(10, t) ~ u_analytic(10, t),
-    Dx(u(-10, t)) ~ du(-10, t),
-    Dx(u(10, t)) ~ du(10, t)]
-
-# Space and time domains
-domains = [x ∈ Interval(-10.0, 10.0),
-    t ∈ Interval(0.0, 1.0)]
-
-# Discretization
-dx = 0.4;
-dt = 0.2;
-
-# Function to compute analytical solution at a specific point (x, t)
-function u_analytic_point(x, t)
-    z = -x / 2 + t
-    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-end
-
-# Function to generate the dataset matrix
-function generate_dataset_matrix(domains, dx, dt, xlim, tlim)
-    x_values = xlim[1]:dx:xlim[2]
-    t_values = tlim[1]:dt:tlim[2]
-
-    dataset = []
-
-    for t in t_values
-        for x in x_values
-            u_value = u_analytic_point(x, t)
-            push!(dataset, [u_value, x, t])
-        end
-    end
-
-    return vcat([data' for data in dataset]...)
-end
-
-datasetpde = [generate_dataset_matrix(domains, dx, dt, [-10, 10], [0.0, 1.0])]
-datasetpde_new = [generate_dataset_matrix(domains, dx, dt, [-10, 0], [0.0, 1.0])]
-
-# noise to dataset
-noisydataset = deepcopy(datasetpde)
-noisydataset[1][:, 1] = noisydataset[1][:, 1] .+ (randn(size(noisydataset[1][:, 1])) .* 0.8)
-
-noisydataset_new = deepcopy(datasetpde_new)
-noisydataset_new[1][:, 1] = noisydataset_new[1][:, 1] .+
-                            (randn(size(noisydataset_new[1][:, 1])) .* 0.8)
-
-plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-scatter!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-plot(datasetpde[1][:, 2], datasetpde[1][:, 3], datasetpde[1][:, 1],
-    title = "Dataset from Analytical Solution")
-scatter!(noisydataset[1][:, 2], noisydataset[1][:, 3], noisydataset[1][:, 1])
-
-plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 1],
-    title = "Dataset from Analytical Solution")
-scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 1])
-
-plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 3],
-    datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
-scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 3], noisydataset_new[1][:, 1])
-
-noise_std = 1.4
-original_data = datasetpde[1][:, 1]
-original_std = std(original_data)
-ratio = noise_std / original_std
-
-# Neural network
-chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-    Lux.Dense(8, 8, Lux.tanh),
-    Lux.Dense(8, 1))
-
-discretization = NeuralPDE.BayesianPINN([chain],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-discretization_new = NeuralPDE.BayesianPINN([chain],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset_new, nothing])
-
-@named pde_system = PDESystem(eq,
-    bcs,
-    domains,
-    [x, t],
-    [u(x, t)],
-    [α],
-    defaults = Dict([α => 2.0]))
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.7],
-    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.7], l2std = [0.15], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol3.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol4.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-sol0_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-julia > sol5_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol1_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_1_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_2_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_3_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-    phystd = [0.3], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol2_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol3_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol4_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 160, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol5_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-p1 = plot(ts, xs, u_predict, title = "predict")
-p2 = plot(ts, xs, u_real, title = "analytic")
-p3 = plot(ts, xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-# MCMC chain analysis
-plot(sol1.original.mcmc_chain)
-plot(sol2.original.mcmc_chain)
-
-plot(sol0_new.original.mcmc_chain)
-plot(sol2_new.original.mcmc_chain)
-
-plot(sol1.original.mcmc_chain)
-meanplot(sol1.original.mcmc_chain)
-autocorplot(sol1.original.mcmc_chain)
-traceplot(sol1.original.mcmc_chain)
-
-plot(sol2.original.mcmc_chain)
-meanplot(sol2.original.mcmc_chain)
-autocorplot(sol2.original.mcmc_chain)
-traceplot(sol2.original.mcmc_chain)
-
-plot(sol0_new.original.mcmc_chain)
-meanplot(sol0_new.original.mcmc_chain)
-autocorplot(sol0_new.original.mcmc_chain)
-
-plot(sol2_new.original.mcmc_chain)
-meanplot(sol2_new.original.mcmc_chain)
-autocorplot(sol2_new.original.mcmc_chain)
-
-plot(sol3_new.original.mcmc_chain)
-meanplot(sol3_new.original.mcmc_chain)
-autocorplot(sol3_new.original.mcmc_chain)
\ No newline at end of file

From ac251447db2eff4e47e31be92f7f95c44330a4b5 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Wed, 2 Oct 2024 19:12:45 +0530
Subject: [PATCH 046/107] corrections in rebase

---
 src/BPINN_ode.jl        | 6 +-----
 src/PDE_BPINN.jl        | 2 +-
 src/advancedHMC_MCMC.jl | 1 -
 test/BPINN_Tests.jl     | 4 ++--
 4 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 5b413f9520..9960006b18 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -102,8 +102,6 @@ struct BNNODE{C, K, IT <: NamedTuple,
     Integratorkwargs::IT
     numensemble::Int64
     estim_collocate::Bool
-    numensemble::Int64
-    estim_collocate::Bool
     autodiff::Bool
     progress::Bool
     verbose::Bool
@@ -116,8 +114,6 @@ function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
             Metric = DiagEuclideanMetric,
             targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,),
-    numensemble = floor(Int, draw_samples / 3),
-    estim_collocate = false,
         numensemble = floor(Int, draw_samples / 3),
         estim_collocate = false,
         autodiff = false, progress = false, verbose = false)
@@ -195,7 +191,7 @@ function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
     @unpack chain, l2std, phystd, param, priorsNNw, Kernel, strategy,
     draw_samples, dataset, init_params,
     nchains, physdt, Adaptorkwargs, Integratorkwargs,
-    MCMCkwargs, numensemble, estim_collocate, numensemble, estim_collocate, autodiff, progress,
+    MCMCkwargs, numensemble, estim_collocate, autodiff, progress,
     verbose = alg
 
     # ahmc_bayesian_pinn_ode needs param=[] for easier vcat operation for full vector of parameters
diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 615f801bc5..c1cd182d98 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -471,7 +471,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # append Ode params to all paramvector - initial_θ
     if ninv > 0
         # shift ode params(initialise ode params by prior means)
-        # check if means or user speified is better
+        # check if means or user specified is better
         initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
         priors = vcat(priors, param)
         nparameters += ninv
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index d72e13d229..7105346aa0 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -17,7 +17,6 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
     extraparams::Int
     init_params::I
     estim_collocate::Bool
-    estim_collocate::Bool
 
     function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
             dataset,
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 6534e88409..88e794df89 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -145,7 +145,7 @@ end
     dataset = [x̂, time]
     physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
 
-    # seperate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
+    # separate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
     time1 = vec(collect(Float64, range(tspan[1], tspan[2], length = 501)))
     physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 
@@ -264,7 +264,7 @@ end
     dataset = [x̂, time]
     physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
 
-    # seperate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
+    # separate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
     time1 = vec(collect(Float64, range(tspan[1], tspan[2], length = 501)))
     physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 

From 2ea5212e0c0c1f2bb49c10010b7f37ff6e77a68d Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 11 Oct 2024 16:14:10 +0530
Subject: [PATCH 047/107] tests pass locally

---
 test/BPINN_PDEinvsol_tests.jl | 248 +++++++++++++++++++++-------------
 1 file changed, 151 insertions(+), 97 deletions(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index b82502259b..07ce051b3a 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -34,33 +34,7 @@ Random.seed!(100)
     u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
     dataset = [hcat(u1, timepoints)]
 
-    # TODO: correct implementations
-    # # checking all training strategies
-    # discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
-    #     dataset = [dataset, nothing])
-
-    # ahmc_bayesian_pinn_pde(pde_system,
-    #     discretization;
-    #     draw_samples = 1500,
-    #     bcstd = [0.05],
-    #     phystd = [0.01], l2std = [0.01],
-    #     priorsNNw = (0.0, 1.0),
-    #     saveats = [1 / 50.0],
-    #     param = [LogNormal(6.0, 0.5)])
-
-    # discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
-    #     dataset = [dataset, nothing])
-
-    # ahmc_bayesian_pinn_pde(pde_system,
-    #     discretization;
-    #     draw_samples = 1500,
-    #     bcstd = [0.05],
-    #     phystd = [0.01], l2std = [0.01],
-    #     priorsNNw = (0.0, 1.0),
-    #     saveats = [1 / 50.0],
-    #     param = [LogNormal(6.0, 0.5)])
-
-        # alternative to QuadratureTraining [WIP]
+    # TODO: correct BPINN implementations for Training Strategies.
 
     discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
         dataset = [dataset, nothing])
@@ -164,73 +138,153 @@ function recur_expression(exp, Dict_differentials)
     end
 end
 
-println("Example 3: 2D Periodic System with New parameter estimation")
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) * u(t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-    dataset = [dataset, nothing])
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01], phystdnew = [0.05],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)],
-    Dict_differentials = Dict_differentials)
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-ts = vec(sol2.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol2.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file
+@testset "improvement in Solving Parametric Kuromo-Sivashinsky Equation" begin
+    @parameters x, t, α
+    @variables u(..)
+    Dt = Differential(t)
+    Dx = Differential(x)
+    Dx2 = Differential(x)^2
+    Dx3 = Differential(x)^3
+    Dx4 = Differential(x)^4
+
+    # α = 1 (KS equation to be parametric in a)
+    β = 4
+    γ = 1
+    eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+    u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+    du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+    bcs = [u(x, 0) ~ u_analytic(x, 0),
+        u(-10, t) ~ u_analytic(-10, t),
+        u(10, t) ~ u_analytic(10, t),
+        Dx(u(-10, t)) ~ du(-10, t),
+        Dx(u(10, t)) ~ du(10, t)]
+
+    # Space and time domains
+    domains = [x ∈ Interval(-10.0, 10.0),
+        t ∈ Interval(0.0, 1.0)]
+
+    # Discretization
+    dx = 0.4
+    dt = 0.2
+
+    # Function to compute analytical solution at a specific point (x, t)
+    function u_analytic_point(x, t)
+        z = -x / 2 + t
+        return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+    end
+
+    # Function to generate the dataset matrix
+    function generate_dataset_matrix(domains, dx, dt, xlim, tlim)
+        x_values = xlim[1]:dx:xlim[2]
+        t_values = tlim[1]:dt:tlim[2]
+
+        dataset = []
+
+        for t in t_values
+            for x in x_values
+                u_value = u_analytic_point(x, t)
+                push!(dataset, [u_value, x, t])
+            end
+        end
+
+        return vcat([data' for data in dataset]...)
+    end
+
+    # considering sparse dataset from half of x's domain
+    datasetpde_new = [generate_dataset_matrix(domains, dx, dt, [-10, 0], [0.0, 1.0])]
+
+    # Adding Gaussian noise with a 0.8 std
+    noisydataset_new = deepcopy(datasetpde_new)
+    noisydataset_new[1][:, 1] = noisydataset_new[1][:, 1] .+
+                                (randn(size(noisydataset_new[1][:, 1])) .* 0.8)
+
+    # Neural network
+    chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+        Lux.Dense(8, 8, Lux.tanh),
+        Lux.Dense(8, 1))
+
+    # Discretization for old and new models
+    discretization = NeuralPDE.BayesianPINN([chain],
+        GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset_new, nothing])
+
+    # let α default to 2.0
+    @named pde_system = PDESystem(eq,
+        bcs,
+        domains,
+        [x, t],
+        [u(x, t)],
+        [α],
+        defaults = Dict([α => 2.0]))
+
+    # neccesarry for loss function contruction (involves Operator masking) 
+    eqs = pde_system.eqs
+    Dict_differentials = Dict()
+    exps = toexpr.(eqs)
+    nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+    # Dict_differentials is now ;
+    # Dict{Any, Any} with 5 entries:
+    #   Differential(x)(Differential(x)(u(x, t)))            => diff_5
+    #   Differential(x)(Differential(x)(Differential(x)(u(x… => diff_1
+    #   Differential(x)(Differential(x)(Differential(x)(Dif… => diff_2
+    #   Differential(x)(u(x, t))                             => diff_4
+    #   Differential(t)(u(x, t))                             => diff_3
+
+    # using HMC algorithm due to convergence, stability, time of training. (refer to mcmc chain plots)
+    # choice of std for objectives is very important
+    # pass in Dict_differentials, phystdnew arguments when using the new model
+
+    sol_new = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 150,
+        bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+        phystd = [0.2], l2std = [0.5], param = [Distributions.Normal(2.0, 2)],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 100.0, 1 / 100.0],
+        Dict_differentials = Dict_differentials,
+        progress = true)
+
+    sol_old = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 150,
+        bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+        phystd = [0.2], l2std = [0.5], param = [Distributions.Normal(2.0, 2)],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 100.0, 1 / 100.0],
+        progress = true)
+
+    phi = discretization.phi[1]
+    xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+              for (d, dx) in zip(domains, [dx / 10, dt])]
+    u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+
+    u_predict_new = [[first(pmean(phi([x, t], sol_new.estimated_nn_params[1]))) for x in xs]
+                     for t in ts]
+
+    diff_u_new = [[abs(u_analytic(x, t) -
+                       first(pmean(phi([x, t], sol_new.estimated_nn_params[1]))))
+                   for x in xs]
+                  for t in ts]
+
+    u_predict_old = [[first(pmean(phi([x, t], sol_old.estimated_nn_params[1]))) for x in xs]
+                     for t in ts]
+    diff_u_old = [[abs(u_analytic(x, t) -
+                       first(pmean(phi([x, t], sol_old.estimated_nn_params[1]))))
+                   for x in xs]
+                  for t in ts]
+
+    @test all(all, [((diff_u_new[i]) .^ 2 .< 0.5) for i in 1:6]) == true
+    @test all(all, [((diff_u_old[i]) .^ 2 .< 0.5) for i in 1:6]) == false
+
+    MSE_new = [sum(abs2, diff_u_new[i]) for i in 1:6]
+    MSE_old = [sum(abs2, diff_u_old[i]) for i in 1:6]
+    @test (MSE_new .< MSE_old) == [1, 1, 1, 1, 1, 1]
+
+    param_new = sol_new.estimated_de_params[1]
+    param_old = sol_old.estimated_de_params[1]
+    α = 1
+    @test abs(param_new - α) < 0.2 * α
+    @test abs(param_new - α) < abs(param_old - α)
+end
\ No newline at end of file

From 9fff415c6dc00a8c1a4bcfa5bfd00a982e8c98c8 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 11 Oct 2024 16:20:19 +0530
Subject: [PATCH 048/107] spell checks, Statistics.jl vers

---
 Project.toml                  | 2 +-
 test/BPINN_PDEinvsol_tests.jl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 026a29ba72..32d95d792b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -75,7 +75,7 @@ Reexport = "1.2"
 RuntimeGeneratedFunctions = "0.5.12"
 SafeTestsets = "0.1"
 SciMLBase = "2.28"
-Statistics = "1.10"
+Statistics = "1.11"
 SymbolicUtils = "1.5, 2, 3"
 Symbolics = "5.27.1, 6"
 Test = "1"
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 07ce051b3a..1d8dca0e1f 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -218,7 +218,7 @@ end
         [α],
         defaults = Dict([α => 2.0]))
 
-    # neccesarry for loss function contruction (involves Operator masking) 
+    # neccesarry for loss function construction (involves Operator masking) 
     eqs = pde_system.eqs
     Dict_differentials = Dict()
     exps = toexpr.(eqs)

From 247b8e3f7a7f2ca11314dab12cd3e303b22fcdff Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 12 Oct 2024 07:09:28 +0530
Subject: [PATCH 049/107] update tests

---
 src/PDE_BPINN.jl              |  2 +-
 test/BPINN_PDE_tests.jl       | 24 +++++++++++++-----------
 test/BPINN_PDEinvsol_tests.jl |  6 ++----
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index c1cd182d98..b03f158942 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -416,7 +416,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         end
     end
 
-    # [WIP] add overall functionality for BC dataset points
+    # [WIP] add overall functionality for BC dataset points (case of parametric BC)
     if ((dataset_bc isa Nothing) && (dataset_pde isa Nothing))
         dataset = nothing
     elseif dataset_bc isa Nothing
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 0cefaba18f..35d62bd352 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -16,7 +16,7 @@ Random.seed!(100)
     eqs = Dt(u(t)) - cos(2 * π * t) ~ 0
     bcs = [u(0) ~ 0.0]
     domains = [t ∈ Interval(0.0, 2.0)]
-    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+    chainl = Lux.Chain(Lux.Dense(1, 5, tanh), Lux.Dense(5, 5, tanh), Lux.Dense(5, 1))
     initl, st = Lux.setup(Random.default_rng(), chainl)
     @named pde_system = PDESystem(eqs, bcs, domains, [t], [u(t)])
 
@@ -25,10 +25,10 @@ Random.seed!(100)
 
     sol1 = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
-        draw_samples = 2000,
-        bcstd = [0.02],
+        draw_samples = 250,
+        bcstd = [0.001],
         phystd = [0.01],
-        priorsNNw = (0.0, 10.0),
+        priorsNNw = (0.0, 1.0),
         saveats = [1 / 50.0])
 
     analytic_sol_func(u0, t) = u0 + sin(2 * π * t) / (2 * π)
@@ -36,8 +36,8 @@ Random.seed!(100)
     u_real = [analytic_sol_func(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
 
-    @test u_predict≈u_real atol=0.05
-    @test mean(u_predict .- u_real) < 1e-3
+    @test u_predict≈u_real atol=0.02
+    @test mean(abs.(u_predict .- u_real)) < 1e-3
 end
 
 @testset "Example 2: 1D ODE" begin
@@ -159,10 +159,10 @@ end
 
     sol1 = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
-        draw_samples = 200,
-        bcstd = [0.01, 0.01, 0.01, 0.01],
-        phystd = [0.005],
-        priorsNNw = (0.0, 2.0),
+        draw_samples = 400,
+        bcstd = [0.05, 0.05, 0.05, 0.05],
+        phystd = [0.05],
+        priorsNNw = (0.0, 1.0),
         saveats = [1 / 100.0, 1 / 100.0])
 
     xs = sol1.timepoints[1]
@@ -170,7 +170,9 @@ end
 
     u_predict = pmean(sol1.ensemblesol[1])
     u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-    @test u_predict≈u_real atol=0.8
+
+    @test sum(abs2.(u_predict .- u_real)) < 0.1
+    @test u_predict≈u_real atol=0.1
 end
 
 @testset "Translating from Flux" begin
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 1d8dca0e1f..5cc53df354 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -243,8 +243,7 @@ end
         phystd = [0.2], l2std = [0.5], param = [Distributions.Normal(2.0, 2)],
         priorsNNw = (0.0, 1.0),
         saveats = [1 / 100.0, 1 / 100.0],
-        Dict_differentials = Dict_differentials,
-        progress = true)
+        Dict_differentials = Dict_differentials)
 
     sol_old = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
@@ -252,8 +251,7 @@ end
         bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
         phystd = [0.2], l2std = [0.5], param = [Distributions.Normal(2.0, 2)],
         priorsNNw = (0.0, 1.0),
-        saveats = [1 / 100.0, 1 / 100.0],
-        progress = true)
+        saveats = [1 / 100.0, 1 / 100.0])
 
     phi = discretization.phi[1]
     xs, ts = [infimum(d.domain):dx:supremum(d.domain)

From 2eba3e3c263d299c87604ccd92bf660ea2d8f21e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 14 Oct 2024 15:54:33 +0000
Subject: [PATCH 050/107] build(deps): bump crate-ci/typos from 1.25.0 to
 1.26.0

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.25.0 to 1.26.0.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.25.0...v1.26.0)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index 33e72aff0c..b9290eac20 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v4
       - name: Check spelling
-        uses: crate-ci/typos@v1.25.0 
\ No newline at end of file
+        uses: crate-ci/typos@v1.26.0 
\ No newline at end of file

From f81aa7a10dfd9e4ed0be0721a4d1f399933904d5 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 15 Oct 2024 17:49:28 +0530
Subject: [PATCH 051/107] low level changes, transform fixes

---
 src/BPINN_ode.jl        | 13 +++---
 src/PDE_BPINN.jl        |  8 ++--
 src/advancedHMC_MCMC.jl | 34 +++++++++------
 src/pinn_types.jl       |  2 +-
 test/BPINN_PDE_tests.jl | 11 ++---
 test/BPINN_Tests.jl     | 97 ++++++++---------------------------------
 6 files changed, 55 insertions(+), 110 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 9960006b18..39bb0aac72 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -93,6 +93,7 @@ struct BNNODE{C, K, IT <: NamedTuple,
     param::P
     l2std::Vector{Float64}
     phystd::Vector{Float64}
+    phynewstd::Vector{Float64}
     dataset::D
     physdt::Float64
     MCMCkwargs::H
@@ -107,7 +108,7 @@ struct BNNODE{C, K, IT <: NamedTuple,
     verbose::Bool
 end
 function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
-        priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05],
+        priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05], phynewstd = [0.05],
         dataset = [nothing], physdt = 1 / 20.0, MCMCkwargs = (n_leapfrog = 30,), nchains = 1,
         init_params = nothing,
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
@@ -121,7 +122,7 @@ function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
         (chain = adapt(FromFluxAdaptor(false, false), chain))
     BNNODE(chain, Kernel, strategy,
         draw_samples, priorsNNw, param, l2std,
-        phystd, dataset, physdt, MCMCkwargs,
+        phystd, phynewstd, dataset, physdt, MCMCkwargs,
         nchains, init_params,
         Adaptorkwargs, Integratorkwargs,
         numensemble, estim_collocate,
@@ -186,9 +187,8 @@ function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
         reltol = 1.0f-3,
         verbose = false,
         saveat = 1 / 50.0,
-        maxiters = nothing,
-        numensemble = floor(Int, alg.draw_samples / 3))
-    @unpack chain, l2std, phystd, param, priorsNNw, Kernel, strategy,
+        maxiters = nothing,)
+    @unpack chain, l2std, phystd, phynewstd, param, priorsNNw, Kernel, strategy,
     draw_samples, dataset, init_params,
     nchains, physdt, Adaptorkwargs, Integratorkwargs,
     MCMCkwargs, numensemble, estim_collocate, autodiff, progress,
@@ -206,7 +206,8 @@ function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
         strategy = strategy, dataset = dataset,
         draw_samples = draw_samples,
         init_params = init_params,
-        physdt = physdt, l2std = l2std,
+        physdt = physdt, phynewstd = phynewstd,
+        l2std = l2std,
         phystd = phystd,
         priorsNNw = priorsNNw,
         param = param,
diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index b03f158942..044080118e 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -562,14 +562,14 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         mcmc_chain = MCMCChains.Chains(matrix_samples')
 
         @info("Sampling Complete.")
-        @info("Current Physics Log-likelihood : ",
+        @info("Final Physics Log-likelihood : ",
             ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]),
                 ℓπ.allstd))
-        @info("Current Prior Log-likelihood : ", priorlogpdf(ℓπ, samples[end]))
-        @info("Current MSE against dataset Log-likelihood : ",
+        @info("Final Prior Log-likelihood : ", priorlogpdf(ℓπ, samples[end]))
+        @info("Final MSE against dataset Log-likelihood : ",
             L2LossData(ℓπ, samples[end]))
         if !(newloss isa Nothing)
-            @info("Current L2_LOSSY : ",
+            @info("Final L2_LOSSY : ",
                 ℓπ.L2_loss2(setparameters(ℓπ, samples[end]),
                     ℓπ.allstd))
         end
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 7105346aa0..8b996fce5c 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -11,6 +11,7 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
     dataset::D
     priors::P
     phystd::Vector{Float64}
+    phynewstd::Vector{Float64}
     l2std::Vector{Float64}
     autodiff::Bool
     physdt::Float64
@@ -20,7 +21,7 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
 
     function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
             dataset,
-            priors, phystd, l2std, autodiff, physdt, extraparams,
+            priors, phystd, phynewstd, l2std, autodiff, physdt, extraparams,
             init_params::AbstractVector, estim_collocate)
         new{
             typeof(chain),
@@ -36,6 +37,7 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
             dataset,
             priors,
             phystd,
+            phynewstd,
             l2std,
             autodiff,
             physdt,
@@ -45,7 +47,7 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
     end
     function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
             dataset,
-            priors, phystd, l2std, autodiff, physdt, extraparams,
+            priors, phystd, phynewstd, l2std, autodiff, physdt, extraparams,
             init_params::NamedTuple, estim_collocate)
         new{
             typeof(chain),
@@ -58,7 +60,8 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
             prob,
             chain, st, strategy,
             dataset, priors,
-            phystd, l2std,
+            phystd, phynewstd,
+             l2std,
             autodiff,
             physdt,
             extraparams,
@@ -136,10 +139,10 @@ function L2loss2(Tar::LogTargetDensity, θ)
    
         physlogprob = 0
         for i in 1:length(Tar.prob.u0)
-            # can add phystd[i] for u[i] 
+            # can add phystdnew[i] for u[i] 
             physlogprob += logpdf(MvNormal(deri_physsol[i, :],
                     LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 4.0) .*
+                        (Tar.phynewstd[i]) .*
                         ones(length(nnsol[i, :]))))),
                 nnsol[i, :])
         end
@@ -162,7 +165,7 @@ function L2LossData(Tar::LogTargetDensity, θ)
 
         L2logprob = 0
         for i in 1:length(Tar.prob.u0)
-            # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
+            # for u[i] ith vector must be added to dataset, nn[1,:] is the dx in lotka_volterra
             L2logprob += logpdf(
                 MvNormal(nn[i, :],
                     LinearAlgebra.Diagonal(abs2.(Tar.l2std[i] .*
@@ -395,7 +398,7 @@ end
     ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
                         dataset = [nothing],init_params = nothing,
                         draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
-                        phystd = [0.05], priorsNNw = (0.0, 2.0),
+                        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
                         param = [], nchains = 1, autodiff = false, Kernel = HMC,
                         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
                                          Metric = DiagEuclideanMetric,
@@ -466,6 +469,7 @@ Incase you are only solving the Equations for solution, do not provide dataset
 * `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
 * `l2std`: standard deviation of BPINN prediction against L2 losses/Dataset
 * `phystd`: standard deviation of BPINN prediction against Chosen Underlying ODE System
+* `phynewstd`: standard deviation of new loss func term
 * `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of BPINN are Normal Distributions by default.
 * `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
 * `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
@@ -492,7 +496,7 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
         strategy = GridTraining, dataset = [nothing],
         init_params = nothing, draw_samples = 1000,
         physdt = 1 / 20.0, l2std = [0.05],
-        phystd = [0.05], priorsNNw = (0.0, 2.0),
+        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
         param = [], nchains = 1, autodiff = false,
         Kernel = HMC,
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
@@ -558,7 +562,7 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
     t0 = prob.tspan[1]
     # dimensions would be total no of params,initial_nnθ for Lux namedTuples
     ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
-        phystd, l2std, autodiff, physdt, ninv, initial_nnθ, estim_collocate)
+        phystd, phynewstd, l2std, autodiff, physdt, ninv, initial_nnθ, estim_collocate)
 
     try
         ℓπ(t0, initial_θ[1:(nparameters - ninv)])
@@ -574,7 +578,8 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
     @info("Current Prior Log-likelihood : ", priorweights(ℓπ, initial_θ))
     @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
     if estim_collocate
-        @info("Current gradient loss against dataset Log-likelihood : ", L2loss2(ℓπ, initial_θ))
+        @info("Current gradient loss against dataset Log-likelihood : ",
+            L2loss2(ℓπ, initial_θ))
     end
 
     Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
@@ -624,11 +629,12 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
             adaptor; progress = progress, verbose = verbose)
 
         @info("Sampling Complete.")
-        @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, samples[end]))
-        @info("Current Prior Log-likelihood : ", priorweights(ℓπ, samples[end]))
-        @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, samples[end]))
+        @info("Final Physics Log-likelihood : ", physloglikelihood(ℓπ, samples[end]))
+        @info("Final Prior Log-likelihood : ", priorweights(ℓπ, samples[end]))
+        @info("Final MSE against dataset Log-likelihood : ", L2LossData(ℓπ, samples[end]))
         if estim_collocate
-            @info("Current gradient loss against dataset Log-likelihood : ", L2loss2(ℓπ, samples[end]))
+            @info("Final gradient loss against dataset Log-likelihood : ",
+                L2loss2(ℓπ, samples[end]))
         end
 
         # return a chain(basic chain),samples and stats
diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index 59480d8a60..6944041efd 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -247,7 +247,7 @@ struct BayesianPINN{T, P, PH, DER, PE, AL, ADA, LOG, D, K} <: AbstractPINN
         multioutput = chain isa AbstractArray
         if multioutput
             !all(i -> i isa Lux.AbstractExplicitLayer, chain) &&
-                (chain = Lux.transform.(chain))
+                (chain = [adapt(FromFluxAdaptor(false, false), chain_i) for chain_i in chain])
         else
             !(chain isa Lux.AbstractExplicitLayer) &&
                 (chain = adapt(FromFluxAdaptor(false, false), chain))
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 35d62bd352..2936911fe0 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -159,10 +159,10 @@ end
 
     sol1 = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
-        draw_samples = 400,
-        bcstd = [0.05, 0.05, 0.05, 0.05],
-        phystd = [0.05],
-        priorsNNw = (0.0, 1.0),
+        draw_samples = 200,
+        bcstd = [0.0025, 0.0025, 0.0025, 0.0025],
+        phystd = [0.005],
+        priorsNNw = (0.0, 0.5),
         saveats = [1 / 100.0, 1 / 100.0])
 
     xs = sol1.timepoints[1]
@@ -171,8 +171,9 @@ end
     u_predict = pmean(sol1.ensemblesol[1])
     u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
 
+    @test mean(abs2.(u_predict .- u_real)) < 5e-3
+    @test all(abs.(u_predict .- u_real) .< 15e-3)
     @test sum(abs2.(u_predict .- u_real)) < 0.1
-    @test u_predict≈u_real atol=0.1
 end
 
 @testset "Translating from Flux" begin
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 88e794df89..1f5672d3f4 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -276,7 +276,7 @@ end
         dataset = dataset,
         draw_samples = 1000,
         l2std = [0.1],
-        phystd = [0.03],
+        phystd = [0.01],
         priorsNNw = (0.0,
             1.0),
         param = [
@@ -288,7 +288,8 @@ end
         dataset = dataset,
         draw_samples = 1000,
         l2std = [0.1],
-        phystd = [0.03],
+        phystd = [0.01],
+        phynewstd = [0.01],
         priorsNNw = (0.0,
             1.0),
         param = [
@@ -299,114 +300,50 @@ end
         dataset = dataset,
         draw_samples = 1000,
         l2std = [0.1],
-        phystd = [0.03],
+        phystd = [0.01],
+        phynewstd = [0.05],
         priorsNNw = (0.0,
             1.0),
         param = [
             Normal(-7, 3)
-        ], estim_collocate = true)
+        ], numensemble = 200,
+        estim_collocate = true)
 
     sol3lux_pestim = solve(prob, alg)
 
     # testing timepoints
     t = sol.t
     #------------------------------ ahmc_bayesian_pinn_ode() call
-    # Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
+    # Mean of last 200 sampled parameter's curves(lux chains)[Ensemble predictions]
     θ = [vector_to_parameters(fhsampleslux12[i][1:(end - 1)], θinit)
-         for i in 750:length(fhsampleslux12)]
+         for i in 800:length(fhsampleslux12)]
     luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
     θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit)
-         for i in 750:length(fhsampleslux22)]
+         for i in 800:length(fhsampleslux22)]
     luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
-    @test mean(abs.(sol.u .- meanscurve2_2)) < 6e-2
-    @test mean(abs.(physsol1 .- meanscurve2_2)) < 6e-2
+    @test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
+    @test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
     @test mean(abs.(sol.u .- meanscurve2_1)) > mean(abs.(sol.u .- meanscurve2_2))
     @test mean(abs.(physsol1 .- meanscurve2_1)) > mean(abs.(physsol1 .- meanscurve2_2))
 
     # estimated parameters(lux chain)
-    param2 = mean(i[62] for i in fhsampleslux22[750:length(fhsampleslux22)])
-    @test abs(param2 - p) < abs(0.25 * p)
+    param2 = mean(i[62] for i in fhsampleslux22[800:length(fhsampleslux22)])
+    @test abs(param2 - p) < abs(0.2 * p)
 
-    param1 = mean(i[62] for i in fhsampleslux12[750:length(fhsampleslux12)])
-    @test abs(param1 - p) < abs(0.75 * p)
+    param1 = mean(i[62] for i in fhsampleslux12[800:length(fhsampleslux12)])
+    @test !(abs(param1 - p) < abs(0.2 * p))
     @test abs(param2 - p) < abs(param1 - p)
 
     #-------------------------- solve() call 
     # (lux chain)
-    @test mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 0.1
+    @test mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 5e-2
     # estimated parameters(lux chain)
     param3 = sol3lux_pestim.estimated_de_params[1]
     @test abs(param3 - p) < abs(0.2 * p)
-end
-
-@testset "Example 4 - improvement" begin
-    function lotka_volterra(u, p, t)
-        # Model parameters.
-        α, β, γ, δ = p
-        # Current state.
-        x, y = u
-
-        # Evaluate differential equations.
-        dx = (α - β * y) * x # prey
-        dy = (δ * x - γ) * y # predator
-
-        return [dx, dy]
-    end
-
-    # initial-value problem.
-    u0 = [1.0, 1.0]
-    p = [1.5, 1.0, 3.0, 1.0]
-    tspan = (0.0, 4.0)
-    prob = ODEProblem(lotka_volterra, u0, tspan, p)
-
-    # Solve using OrdinaryDiffEq.jl solver
-    dt = 0.2
-    solution = solve(prob, Tsit5(); saveat = dt)
-
-    times = solution.t
-    u = hcat(solution.u...)
-    x = u[1, :] + (0.8 .* randn(length(u[1, :])))
-    y = u[2, :] + (0.8 .* randn(length(u[2, :])))
-    dataset = [x, y, times]
-
-    chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-        Lux.Dense(6, 2))
-
-    alg1 = BNNODE(chain;
-        dataset = dataset,
-        draw_samples = 1000,
-        l2std = [0.2, 0.2],
-        phystd = [0.1, 0.1],
-        priorsNNw = (0.0, 1.0),
-        param = [
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5)])
-
-    alg2 = BNNODE(chain;
-        dataset = dataset,
-        draw_samples = 1000,
-        l2std = [0.2, 0.2],
-        phystd = [0.1, 0.1],
-        priorsNNw = (0.0, 1.0),
-        param = [
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5)], estim_collocate = true)
-
-    @time sol_pestim1 = solve(prob, alg1; saveat = dt)
-    @time sol_pestim2 = solve(prob, alg2; saveat = dt)
-
-    unsafe_comparisons(true)
-    bitvec = abs.(p .- sol_pestim1.estimated_de_params) .>
-             abs.(p .- sol_pestim2.estimated_de_params)
-    @test bitvec == ones(size(bitvec))
 end
\ No newline at end of file

From 4506f6aa077a069580d23a5ef04e5e484040f871 Mon Sep 17 00:00:00 2001
From: Avik Pal <avik.pal.2017@gmail.com>
Date: Wed, 16 Oct 2024 20:30:36 -0400
Subject: [PATCH 052/107] refactor: trigger build with latest Lux (#882)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* chore: update to latest Lux

* test: run tests enabling type-instability checks

* chore: bump minimum versions

* adapt fix

* undo NNPDE_tests.jl

* Revert "adapt fix"

This reverts commit 71c7071e76b0d48d247cc986fc4c2f27c4024c2f.

* Array{eltypeθ}

* Revert "Array{eltypeθ}"

This reverts commit 6d6d670777802abd35ae77d03413a81787560bb4.

* convert

* add convert topost  adapt

* fix ε_

* fix

* fix convert

* update convert, adapt

* update project.toml

* update Project

* update Project

* update Project

* update

* fix

* typos

* fix: dependencies

* test: merge qa testing

* chore: run formatter

* fix: BPINN ODE testing

* fix: update minimum versions

* refactor: update DGM implementation

* test: mark weighted training test as broken

* refactor: remove junk boilerplate

* fix: element type handling

* fix: incorrect DGM architecture

* refactor: rearrange exports

* test: run logging with non-error depwarn

* fix: forward tests

* fix: downgrade testing

* refactor: cleanup NNODE

* refactor: use explicit imports

* refactor: cleanup NNDAE

* feat: bring back NNRODE

* refactor: cleanup PINN code

* fix: eltype conversions in IntegroDiff

* refactor: cleanup neural adapter code

* refactor: bayesian PINN ODEs

* fix: missing NNRODE tests

* fix: try fixing more tests

* fix: different device handling

* fix: Bayesian PINN

* test: try reducing maxiters

* refactor: more cleanup of neural adapter

* docs: update compat

* docs: cleanup

* refactor: cleanup of deps a bit

* fix: allow scalar for number types

* fix: neural adapter tests

* fix: final round of cleanup

* fix: remove incorrect NNRODE implementation

* refactor: remove NeuralPDELogging in-favor of extension (#901)

---------

Co-authored-by: KirillZubov <kirillzubov3@gmail.com>
---
 .JuliaFormatter.toml                          |   3 +-
 .buildkite/pipeline.yml                       |   6 +-
 .github/workflows/CompatHelper.yml            |   2 +-
 .github/workflows/Downgrade.yml               |   4 +-
 .github/workflows/Tests.yml                   |   5 +-
 Project.toml                                  |  89 ++-
 docs/Project.toml                             |  16 +-
 docs/src/examples/3rd.md                      |   6 +-
 docs/src/examples/complex.md                  |  13 +-
 docs/src/examples/heterogeneous.md            |  10 +-
 docs/src/examples/ks.md                       |   7 +-
 docs/src/examples/linear_parabolic.md         |   9 +-
 docs/src/examples/nonlinear_elliptic.md       |  11 +-
 docs/src/examples/nonlinear_hyperbolic.md     |   4 +-
 docs/src/examples/wave.md                     |  12 +-
 docs/src/tutorials/Lotka_Volterra_BPINNs.md   |   3 +-
 docs/src/tutorials/dae.md                     |   5 +-
 .../tutorials/derivative_neural_network.md    |  13 +-
 docs/src/tutorials/dgm.md                     |  12 +-
 docs/src/tutorials/gpu.md                     |  18 +-
 docs/src/tutorials/low_level.md               |  14 +-
 docs/src/tutorials/low_level_2.md             |   6 +-
 docs/src/tutorials/neural_adapter.md          |   4 +-
 ext/NeuralPDETensorBoardLoggerExt.jl          |  19 +
 lib/NeuralPDELogging/LICENSE                  |   9 -
 lib/NeuralPDELogging/Project.toml             |  27 -
 lib/NeuralPDELogging/src/NeuralPDELogging.jl  |  24 -
 .../test/adaptive_loss_log_tests.jl           | 135 ----
 lib/NeuralPDELogging/test/runtests.jl         |  45 --
 src/BPINN_ode.jl                              | 210 +++---
 src/NeuralPDE.jl                              | 130 ++--
 src/PDE_BPINN.jl                              | 310 ++++-----
 src/adaptive_losses.jl                        | 210 +++---
 src/advancedHMC_MCMC.jl                       | 601 +++++++-----------
 src/dae_solve.jl                              | 166 ++---
 src/dgm.jl                                    | 140 ++--
 src/discretize.jl                             | 345 ++++------
 src/eltype_matching.jl                        |  14 +
 src/neural_adapter.jl                         | 124 +---
 src/ode_solve.jl                              | 399 +++++-------
 src/pinn_types.jl                             | 393 ++++--------
 src/rode_solve.jl                             | 116 ----
 src/symbolic_utilities.jl                     |  26 +-
 src/training_strategies.jl                    | 277 ++++----
 src/transform_inf_integral.jl                 |   6 +-
 test/BPINN_PDE_tests.jl                       |  82 +--
 test/BPINN_PDEinvsol_tests.jl                 |  10 +-
 test/BPINN_Tests.jl                           | 202 ++----
 test/IDE_tests.jl                             |  35 +-
 test/NNDAE_tests.jl                           |  27 +-
 test/NNODE_tests.jl                           | 160 ++---
 test/NNODE_tstops_test.jl                     |  77 +--
 test/NNPDE_tests.jl                           | 115 ++--
 test/NNPDE_tests_gpu_Lux.jl                   |  47 +-
 test/NNRODE_tests.jl                          |  40 --
 test/adaptive_loss_tests.jl                   |  66 +-
 test/additional_loss_tests.jl                 |  49 +-
 test/dgm_test.jl                              |  37 +-
 test/direct_function_tests.jl                 |  26 +-
 test/forward_tests.jl                         |  33 +-
 test/logging_tests.jl                         | 102 +++
 test/neural_adapter_tests.jl                  | 190 +++---
 test/qa.jl                                    |  17 +-
 test/runtests.jl                              |  97 +--
 64 files changed, 1967 insertions(+), 3443 deletions(-)
 create mode 100644 ext/NeuralPDETensorBoardLoggerExt.jl
 delete mode 100644 lib/NeuralPDELogging/LICENSE
 delete mode 100644 lib/NeuralPDELogging/Project.toml
 delete mode 100644 lib/NeuralPDELogging/src/NeuralPDELogging.jl
 delete mode 100644 lib/NeuralPDELogging/test/adaptive_loss_log_tests.jl
 delete mode 100644 lib/NeuralPDELogging/test/runtests.jl
 create mode 100644 src/eltype_matching.jl
 delete mode 100644 src/rode_solve.jl
 delete mode 100644 test/NNRODE_tests.jl
 create mode 100644 test/logging_tests.jl

diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
index 9c79359112..320e0c0737 100644
--- a/.JuliaFormatter.toml
+++ b/.JuliaFormatter.toml
@@ -1,2 +1,3 @@
 style = "sciml"
-format_markdown = true
\ No newline at end of file
+format_markdown = true
+annotate_untyped_fields_with_any = false
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index a83997c38d..29a8d655a3 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -1,15 +1,15 @@
 steps:
-  - label: "GPU"
+  - label: "CUDA"
     plugins:
       - JuliaCI/julia#v1:
           version: "1"
       - JuliaCI/julia-test#v1:
-           coverage: false # 1000x slowdown
+           coverage: true
     agents:
       queue: "juliagpu"
       cuda: "*"
     env:
-      GROUP: 'GPU'
+      GROUP: 'CUDA'
       JULIA_PKG_SERVER: "" # it often struggles with our large artifacts
       # SECRET_CODECOV_TOKEN: "..."
     timeout_in_minutes: 240
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index 8e1252862c..73494545f2 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -23,4 +23,4 @@ jobs:
       - name: CompatHelper.main()
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: julia -e 'using CompatHelper; CompatHelper.main(;subdirs=["", "docs", "lib/NeuralPDELogging"])'
+        run: julia -e 'using CompatHelper; CompatHelper.main(;subdirs=["", "docs"])'
diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml
index d9473471ec..bcfab6b5d0 100644
--- a/.github/workflows/Downgrade.yml
+++ b/.github/workflows/Downgrade.yml
@@ -30,7 +30,7 @@ jobs:
           - NeuralAdapter
           - IntegroDiff
         version:
-          - "1"
+          - "1.10"
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
@@ -55,7 +55,7 @@ jobs:
           GROUP: ${{ matrix.group }}
       - uses: julia-actions/julia-processcoverage@v1
         with:
-          directories: src,lib/NeuralPDELogging/src
+          directories: src,ext
       - uses: codecov/codecov-action@v4
         with:
           files: lcov.info
diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml
index a290993f27..b1b5ecd8f4 100644
--- a/.github/workflows/Tests.yml
+++ b/.github/workflows/Tests.yml
@@ -23,6 +23,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
+        version:
+          - "1.10"
         group:
           - "QA"
           - "ODEBPINN"
@@ -39,5 +41,6 @@ jobs:
     uses: "SciML/.github/.github/workflows/tests.yml@v1"
     with:
       group: "${{ matrix.group }}"
-      coverage-directories: "src,lib/NeuralPDELogging/src"
+      coverage-directories: "src,ext"
+      julia-version: "${{ matrix.version }}"
     secrets: "inherit"
diff --git a/Project.toml b/Project.toml
index 026a29ba72..21b49693df 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,97 +4,128 @@ authors = ["Chris Rackauckas <accounts@chrisrackauckas.com>"]
 version = "5.16.0"
 
 [deps]
+ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
+ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 Cubature = "667455a9-e2ce-5579-9412-b964f529a492"
-DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 Integrals = "de52edbc-65ea-441a-8357-d3a637375a31"
+IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
+MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
 ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
 MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca"
-Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
 OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+SymbolicIndexingInterface = "2efcf032-c050-4f8e-a9bb-153293bab1f5"
 SymbolicUtils = "d1185830-fcd6-423d-90d6-eec64667417b"
 Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
-UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
+WeightInitializers = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
+[weakdeps]
+TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
+
+[extensions]
+NeuralPDETensorBoardLoggerExt = "TensorBoardLogger"
+
 [compat]
+ADTypes = "1.9.0"
 Adapt = "4"
 AdvancedHMC = "0.6.1"
 Aqua = "0.8"
-ArrayInterface = "7.9"
-CUDA = "5.3"
+ArrayInterface = "7.11"
+CUDA = "5.5.2"
 ChainRulesCore = "1.24"
-ComponentArrays = "0.15.14"
+ComponentArrays = "0.15.16"
+ConcreteStructs = "0.2.3"
 Cubature = "1.5"
 DiffEqNoiseProcess = "5.20"
 Distributions = "0.25.107"
 DocStringExtensions = "0.9.3"
-DomainSets = "0.6, 0.7"
-Flux = "0.14.11"
+DomainSets = "0.7"
+ExplicitImports = "1.10.1"
+Flux = "0.14.22"
 ForwardDiff = "0.10.36"
-Functors = "0.4.10"
-Integrals = "4.4"
-LineSearches = "7.2"
-LinearAlgebra = "1"
+Functors = "0.4.12"
+Integrals = "4.5"
+IntervalSets = "0.7.10"
+LineSearches = "7.3"
+LinearAlgebra = "1.10"
 LogDensityProblems = "2"
-Lux = "0.5.58"
-LuxCUDA = "0.3.2"
+Lux = "1.1.0"
+LuxCUDA = "0.3.3"
+LuxCore = "1.0.1"
+LuxLib = "1.3.2"
 MCMCChains = "6"
-MethodOfLines = "0.11"
-ModelingToolkit = "9.9"
+MLDataDevices = "1.2.0"
+MethodOfLines = "0.11.6"
+ModelingToolkit = "9.46"
 MonteCarloMeasurements = "1.1"
-Optim = "1.7.8"
-Optimization = "3.24, 4"
-OptimizationOptimJL = "0.2.1"
-OptimizationOptimisers = "0.2.1, 0.3"
-OrdinaryDiffEq = "6.74"
-Pkg = "1"
+Optimisers = "0.3.3"
+Optimization = "4"
+OptimizationOptimJL = "0.4"
+OptimizationOptimisers = "0.3"
+OrdinaryDiffEq = "6.87"
+Pkg = "1.10"
+Printf = "1.10"
 QuasiMonteCarlo = "0.3.2"
 Random = "1"
+RecursiveArrayTools = "3.27.0"
 Reexport = "1.2"
 RuntimeGeneratedFunctions = "0.5.12"
 SafeTestsets = "0.1"
-SciMLBase = "2.28"
+SciMLBase = "2.56"
 Statistics = "1.10"
-SymbolicUtils = "1.5, 2, 3"
-Symbolics = "5.27.1, 6"
-Test = "1"
-UnPack = "1"
-Zygote = "0.6.69"
+StochasticDiffEq = "6.69.1"
+SymbolicIndexingInterface = "0.3.31"
+SymbolicUtils = "3.7.2"
+Symbolics = "6.14"
+TensorBoardLogger = "0.1.24"
+Test = "1.10"
+WeightInitializers = "1.0.3"
+Zygote = "0.6.71"
 julia = "1.10"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
+ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
+LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
+LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11"
 MethodOfLines = "94925ecb-adb7-4558-8ed8-f975c56a0bf4"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
+StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0"
+TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "Test", "CUDA", "SafeTestsets", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "LineSearches", "LuxCUDA", "Flux", "MethodOfLines"]
+test = ["Aqua", "CUDA", "DiffEqNoiseProcess", "ExplicitImports", "Flux", "LineSearches", "LuxCUDA", "LuxCore", "LuxLib", "MethodOfLines", "OptimizationOptimJL", "OrdinaryDiffEq", "Pkg", "SafeTestsets", "StochasticDiffEq", "TensorBoardLogger", "Test"]
diff --git a/docs/Project.toml b/docs/Project.toml
index 3e62098b0a..b8bbab2416 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -35,20 +35,20 @@ DiffEqBase = "6.148"
 Distributions = "0.25.107"
 Documenter = "1"
 DomainSets = "0.6, 0.7"
-Flux = "0.14.11"
+Flux = "0.14.17"
 Integrals = "4"
 LineSearches = "7.2"
-Lux = "0.5.22"
+Lux = "1"
 LuxCUDA = "0.3.2"
 MethodOfLines = "0.11"
 ModelingToolkit = "9.7"
 MonteCarloMeasurements = "1"
-NeuralPDE = "5.14"
-Optimization = "3.24, 4"
-OptimizationOptimJL = "0.2.1, 0.3, 0.4"
-OptimizationOptimisers = "0.2.1, 0.3"
-OptimizationPolyalgorithms = "0.2"
-OrdinaryDiffEq = "6.74"
+NeuralPDE = "5"
+Optimization = "4"
+OptimizationOptimJL = "0.4"
+OptimizationOptimisers = "0.3"
+OptimizationPolyalgorithms = "0.3"
+OrdinaryDiffEq = "6.87"
 Plots = "1.36"
 QuasiMonteCarlo = "0.3.2"
 Random = "1"
diff --git a/docs/src/examples/3rd.md b/docs/src/examples/3rd.md
index e64358e177..762b0b8d54 100644
--- a/docs/src/examples/3rd.md
+++ b/docs/src/examples/3rd.md
@@ -36,18 +36,18 @@ bcs = [u(0.0) ~ 0.0,
 domains = [x ∈ Interval(0.0, 1.0)]
 
 # Neural network
-chain = Lux.Chain(Dense(1, 8, Lux.σ), Dense(8, 1))
+chain = Chain(Dense(1, 8, σ), Dense(8, 1))
 
 discretization = PhysicsInformedNN(chain, QuasiRandomTraining(20))
 @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
 prob = discretize(pde_system, discretization)
 
 callback = function (p, l)
-    println("Current loss is: $l")
+    (p.iter % 500 == 0 || p.iter == 2000) && println("Current loss is: $l")
     return false
 end
 
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2000)
+res = solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2000, callback)
 phi = discretization.phi
 ```
 
diff --git a/docs/src/examples/complex.md b/docs/src/examples/complex.md
index ff9f1339a5..8d69dacc8a 100644
--- a/docs/src/examples/complex.md
+++ b/docs/src/examples/complex.md
@@ -5,10 +5,7 @@ NeuralPDE supports training PINNs with complex differential equations. This exam
 As the input to this neural network is time which is real, we need to initialize the parameters of the neural network with complex values for it to output and train with complex values.
 
 ```@example complex
-using Random, NeuralPDE
-using OrdinaryDiffEq
-using Lux, OptimizationOptimisers
-using Plots
+using Random, NeuralPDE, OrdinaryDiffEq, Lux, OptimizationOptimisers, Plots
 rng = Random.default_rng()
 Random.seed!(100)
 
@@ -30,11 +27,9 @@ parameters = [2.0, 0.0, 1.0]
 
 problem = ODEProblem(bloch_equations, u0, time_span, parameters)
 
-chain = Lux.Chain(
-    Lux.Dense(1, 16, tanh;
-        init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...)),
-    Lux.Dense(
-        16, 4; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...))
+chain = Chain(
+    Dense(1, 16, tanh; init_weight = kaiming_normal(ComplexF64)),
+    Dense(16, 4; init_weight = kaiming_normal(ComplexF64))
 )
 ps, st = Lux.setup(rng, chain)
 
diff --git a/docs/src/examples/heterogeneous.md b/docs/src/examples/heterogeneous.md
index 069116dede..9f7d5fb1d8 100644
--- a/docs/src/examples/heterogeneous.md
+++ b/docs/src/examples/heterogeneous.md
@@ -31,11 +31,11 @@ domains = [x ∈ Interval(0.0, 1.0),
     y ∈ Interval(0.0, 1.0)]
 
 numhid = 3
-chains = [[Lux.Chain(Dense(1, numhid, Lux.σ), Dense(numhid, numhid, Lux.σ),
-               Dense(numhid, 1)) for i in 1:2]
-          [Lux.Chain(Dense(2, numhid, Lux.σ), Dense(numhid, numhid, Lux.σ),
-               Dense(numhid, 1)) for i in 1:2]]
-discretization = NeuralPDE.PhysicsInformedNN(chains, QuadratureTraining())
+chains = [[Chain(Dense(1, numhid, σ), Dense(numhid, numhid, σ), Dense(numhid, 1))
+           for i in 1:2]
+          [Chain(Dense(2, numhid, σ), Dense(numhid, numhid, σ), Dense(numhid, 1))
+           for i in 1:2]]
+discretization = PhysicsInformedNN(chains, QuadratureTraining())
 
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [p(x), q(y), r(x, y), s(y, x)])
 prob = SciMLBase.discretize(pde_system, discretization)
diff --git a/docs/src/examples/ks.md b/docs/src/examples/ks.md
index 55f75f825d..8afff0e29f 100644
--- a/docs/src/examples/ks.md
+++ b/docs/src/examples/ks.md
@@ -53,14 +53,13 @@ bcs = [u(x, 0) ~ u_analytic(x, 0),
     Dx(u(10, t)) ~ du(10, t)]
 
 # Space and time domains
-domains = [x ∈ Interval(-10.0, 10.0),
-    t ∈ Interval(0.0, 1.0)]
+domains = [x ∈ Interval(-10.0, 10.0), t ∈ Interval(0.0, 1.0)]
 # Discretization
 dx = 0.4;
 dt = 0.2;
 
 # Neural network
-chain = Lux.Chain(Dense(2, 12, Lux.σ), Dense(12, 12, Lux.σ), Dense(12, 1))
+chain = Chain(Dense(2, 12, σ), Dense(12, 12, σ), Dense(12, 1))
 
 discretization = PhysicsInformedNN(chain, GridTraining([dx, dt]))
 @named pde_system = PDESystem(eq, bcs, domains, [x, t], [u(x, t)])
@@ -72,7 +71,7 @@ callback = function (p, l)
 end
 
 opt = OptimizationOptimJL.BFGS()
-res = Optimization.solve(prob, opt; maxiters = 2000)
+res = Optimization.solve(prob, opt; maxiters = 2000, callback)
 phi = discretization.phi
 ```
 
diff --git a/docs/src/examples/linear_parabolic.md b/docs/src/examples/linear_parabolic.md
index c481114a20..6f454f1261 100644
--- a/docs/src/examples/linear_parabolic.md
+++ b/docs/src/examples/linear_parabolic.md
@@ -70,7 +70,7 @@ domains = [x ∈ Interval(0.0, 1.0),
 # Neural network
 input_ = length(domains)
 n = 15
-chain = [Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, 1)) for _ in 1:2]
+chain = [Chain(Dense(input_, n, σ), Dense(n, n, σ), Dense(n, 1)) for _ in 1:2]
 
 strategy = StochasticTraining(500)
 discretization = PhysicsInformedNN(chain, strategy)
@@ -82,18 +82,17 @@ sym_prob = symbolic_discretize(pdesystem, discretization)
 pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
 bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
 
-global iteration = 0
 callback = function (p, l)
-    if iteration % 10 == 0
+    if p.iter % 500 == 0
+        println("iter: ", p.iter)
         println("loss: ", l)
         println("pde_losses: ", map(l_ -> l_(p.u), pde_inner_loss_functions))
         println("bcs_losses: ", map(l_ -> l_(p.u), bcs_inner_loss_functions))
     end
-    global iteration += 1
     return false
 end
 
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(1e-2); maxiters = 10000)
+res = solve(prob, OptimizationOptimisers.Adam(1e-2); maxiters = 5000, callback)
 
 phi = discretization.phi
 
diff --git a/docs/src/examples/nonlinear_elliptic.md b/docs/src/examples/nonlinear_elliptic.md
index d7f8a58579..50e2ab3351 100644
--- a/docs/src/examples/nonlinear_elliptic.md
+++ b/docs/src/examples/nonlinear_elliptic.md
@@ -71,13 +71,12 @@ der_ = [Dy(u(x, y)) ~ Dyu(x, y),
 bcs__ = [bcs_; der_]
 
 # Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
+domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
 
 # Neural network
 input_ = length(domains)
 n = 15
-chain = [Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, 1)) for _ in 1:6] # 1:number of @variables
+chain = [Chain(Dense(input_, n, σ), Dense(n, n, σ), Dense(n, 1)) for _ in 1:6] # 1:number of @variables
 
 strategy = GridTraining(0.01)
 discretization = PhysicsInformedNN(chain, strategy)
@@ -91,19 +90,17 @@ pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
 bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions[1:6]
 approx_derivative_loss_functions = sym_prob.loss_functions.bc_loss_functions[7:end]
 
-global iteration = 0
 callback = function (p, l)
-    if iteration % 10 == 0
+    if p.iter % 10 == 0
         println("loss: ", l)
         println("pde_losses: ", map(l_ -> l_(p.u), pde_inner_loss_functions))
         println("bcs_losses: ", map(l_ -> l_(p.u), bcs_inner_loss_functions))
         println("der_losses: ", map(l_ -> l_(p.u), approx_derivative_loss_functions))
     end
-    global iteration += 1
     return false
 end
 
-res = Optimization.solve(prob, BFGS(); maxiters = 100)
+res = solve(prob, BFGS(); maxiters = 100, callback)
 
 phi = discretization.phi
 
diff --git a/docs/src/examples/nonlinear_hyperbolic.md b/docs/src/examples/nonlinear_hyperbolic.md
index 08e2552c71..14688b8e9c 100644
--- a/docs/src/examples/nonlinear_hyperbolic.md
+++ b/docs/src/examples/nonlinear_hyperbolic.md
@@ -81,7 +81,7 @@ domains = [t ∈ Interval(0.0, 1.0),
 # Neural network
 input_ = length(domains)
 n = 15
-chain = [Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, 1)) for _ in 1:2]
+chain = [Chain(Dense(input_, n, σ), Dense(n, n, σ), Dense(n, 1)) for _ in 1:2]
 
 strategy = QuadratureTraining()
 discretization = PhysicsInformedNN(chain, strategy)
@@ -100,7 +100,7 @@ callback = function (p, l)
     return false
 end
 
-res = Optimization.solve(prob, BFGS(linesearch = BackTracking()); maxiters = 200)
+res = Optimization.solve(prob, BFGS(linesearch = BackTracking()); maxiters = 200, callback)
 
 phi = discretization.phi
 
diff --git a/docs/src/examples/wave.md b/docs/src/examples/wave.md
index d53e4df65a..8ef6d33085 100644
--- a/docs/src/examples/wave.md
+++ b/docs/src/examples/wave.md
@@ -42,7 +42,7 @@ domains = [t ∈ Interval(0.0, 1.0),
 dx = 0.1
 
 # Neural network
-chain = Lux.Chain(Dense(2, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
+chain = Chain(Dense(2, 16, σ), Dense(16, 16, σ), Dense(16, 1))
 discretization = PhysicsInformedNN(chain, GridTraining(dx))
 
 @named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
@@ -55,7 +55,7 @@ end
 
 # optimizer
 opt = OptimizationOptimJL.BFGS()
-res = Optimization.solve(prob, opt; callback = callback, maxiters = 1200)
+res = Optimization.solve(prob, opt; callback, maxiters = 1200)
 phi = discretization.phi
 ```
 
@@ -138,11 +138,11 @@ domains = [t ∈ Interval(0.0, L),
 # Neural network
 inn = 25
 innd = 4
-chain = [[Lux.Chain(Dense(2, inn, Lux.tanh),
-              Dense(inn, inn, Lux.tanh),
-              Dense(inn, inn, Lux.tanh),
+chain = [[Chain(Dense(2, inn, tanh),
+              Dense(inn, inn, tanh),
+              Dense(inn, inn, tanh),
               Dense(inn, 1)) for _ in 1:3]
-         [Lux.Chain(Dense(2, innd, Lux.tanh), Dense(innd, 1)) for _ in 1:2]]
+         [Chain(Dense(2, innd, tanh), Dense(innd, 1)) for _ in 1:2]]
 
 strategy = GridTraining(0.02)
 discretization = PhysicsInformedNN(chain, strategy;)
diff --git a/docs/src/tutorials/Lotka_Volterra_BPINNs.md b/docs/src/tutorials/Lotka_Volterra_BPINNs.md
index a8a2bb0eb3..e7d62c926f 100644
--- a/docs/src/tutorials/Lotka_Volterra_BPINNs.md
+++ b/docs/src/tutorials/Lotka_Volterra_BPINNs.md
@@ -70,8 +70,7 @@ Let's define a PINN.
 
 ```@example bpinn
 # Neural Networks must have 2 outputs as u -> [dx,dy] in function lotka_volterra()
-chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-    Lux.Dense(6, 2))
+chain = Chain(Dense(1, 6, tanh), Dense(6, 6, tanh), Dense(6, 2))
 ```
 
 The dataset we generated can be passed for doing parameter estimation using provided priors in `param` keyword argument for [`BNNODE`](@ref).
diff --git a/docs/src/tutorials/dae.md b/docs/src/tutorials/dae.md
index 1f468caedd..29491e77ab 100644
--- a/docs/src/tutorials/dae.md
+++ b/docs/src/tutorials/dae.md
@@ -12,10 +12,7 @@ This tutorial is an introduction to using physics-informed neural networks (PINN
 Let's solve a simple DAE system:
 
 ```@example dae
-using NeuralPDE
-using Random
-using OrdinaryDiffEq, Statistics
-using Lux, OptimizationOptimisers
+using NeuralPDE, Random, OrdinaryDiffEq, Statistics, Lux, OptimizationOptimisers
 
 example = (du, u, p, t) -> [cos(2pi * t) - du[1], u[2] + cos(2pi * t) - du[2]]
 u₀ = [1.0, -1.0]
diff --git a/docs/src/tutorials/derivative_neural_network.md b/docs/src/tutorials/derivative_neural_network.md
index 3963be4308..bd26ce50fe 100644
--- a/docs/src/tutorials/derivative_neural_network.md
+++ b/docs/src/tutorials/derivative_neural_network.md
@@ -91,14 +91,13 @@ input_ = length(domains)
 n = 15
 chain = [Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, 1)) for _ in 1:7]
 
-training_strategy = NeuralPDE.QuadratureTraining(;
-    batch = 200, reltol = 1e-6, abstol = 1e-6)
-discretization = NeuralPDE.PhysicsInformedNN(chain, training_strategy)
+training_strategy = QuadratureTraining(; batch = 200, reltol = 1e-6, abstol = 1e-6)
+discretization = PhysicsInformedNN(chain, training_strategy)
 
 vars = [u1(t, x), u2(t, x), u3(t, x), Dxu1(t, x), Dtu1(t, x), Dxu2(t, x), Dtu2(t, x)]
 @named pdesystem = PDESystem(eqs_, bcs__, domains, [t, x], vars)
-prob = NeuralPDE.discretize(pdesystem, discretization)
-sym_prob = NeuralPDE.symbolic_discretize(pdesystem, discretization)
+prob = discretize(pdesystem, discretization)
+sym_prob = symbolic_discretize(pdesystem, discretization)
 
 pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
 bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions[1:7]
@@ -112,9 +111,9 @@ callback = function (p, l)
     return false
 end
 
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2000)
+res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2000, callback)
 prob = remake(prob, u0 = res.u)
-res = Optimization.solve(prob, LBFGS(linesearch = BackTracking()); maxiters = 200)
+res = Optimization.solve(prob, LBFGS(linesearch = BackTracking()); maxiters = 200, callback)
 
 phi = discretization.phi
 ```
diff --git a/docs/src/tutorials/dgm.md b/docs/src/tutorials/dgm.md
index a769795eff..f684d419c5 100644
--- a/docs/src/tutorials/dgm.md
+++ b/docs/src/tutorials/dgm.md
@@ -53,7 +53,6 @@ u(t, 1) & = 0
 ```@example dgm
 using NeuralPDE
 using ModelingToolkit, Optimization, OptimizationOptimisers
-using Lux: tanh, identity
 using Distributions
 using ModelingToolkit: Interval, infimum, supremum
 using MethodOfLines, OrdinaryDiffEq
@@ -95,18 +94,15 @@ strategy = QuasiRandomTraining(256, minibatch = 32)
 discretization = DeepGalerkin(2, 1, 50, 5, tanh, tanh, identity, strategy)
 @named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
 prob = discretize(pde_system, discretization)
-global iter = 0
+
 callback = function (p, l)
-    global iter += 1
-    if iter % 20 == 0
-        println("$iter => $l")
-    end
+    (p.iter % 20 == 0) && println("$(p.iter) => $l")
     return false
 end
 
-res = Optimization.solve(prob, Adam(0.1); maxiters = 100)
+res = solve(prob, Adam(0.1); maxiters = 100)
 prob = remake(prob, u0 = res.u)
-res = Optimization.solve(prob, Adam(0.01); maxiters = 500)
+res = solve(prob, Adam(0.01); maxiters = 500)
 phi = discretization.phi
 
 u_predict = [first(phi([t, x], res.minimizer)) for t in ts, x in xs]
diff --git a/docs/src/tutorials/gpu.md b/docs/src/tutorials/gpu.md
index 82a07dceb2..b1f2923471 100644
--- a/docs/src/tutorials/gpu.md
+++ b/docs/src/tutorials/gpu.md
@@ -33,11 +33,8 @@ using the `gpu` function on the initial parameters, like:
 using Lux, LuxCUDA, ComponentArrays, Random
 const gpud = gpu_device()
 inner = 25
-chain = Chain(Dense(3, inner, Lux.σ),
-    Dense(inner, inner, Lux.σ),
-    Dense(inner, inner, Lux.σ),
-    Dense(inner, inner, Lux.σ),
-    Dense(inner, 1))
+chain = Chain(Dense(3, inner, σ), Dense(inner, inner, σ), Dense(inner, inner, σ),
+    Dense(inner, inner, σ), Dense(inner, 1))
 ps = Lux.setup(Random.default_rng(), chain)[1]
 ps = ps |> ComponentArray |> gpud .|> Float64
 ```
@@ -82,18 +79,13 @@ domains = [t ∈ Interval(t_min, t_max),
 
 # Neural network
 inner = 25
-chain = Chain(Dense(3, inner, Lux.σ),
-    Dense(inner, inner, Lux.σ),
-    Dense(inner, inner, Lux.σ),
-    Dense(inner, inner, Lux.σ),
-    Dense(inner, 1))
+chain = Chain(Dense(3, inner, σ), Dense(inner, inner, σ), Dense(inner, inner, σ),
+    Dense(inner, inner, σ), Dense(inner, 1))
 
 strategy = QuasiRandomTraining(100)
 ps = Lux.setup(Random.default_rng(), chain)[1]
 ps = ps |> ComponentArray |> gpud .|> Float64
-discretization = PhysicsInformedNN(chain,
-    strategy,
-    init_params = ps)
+discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
 
 @named pde_system = PDESystem(eq, bcs, domains, [t, x, y], [u(t, x, y)])
 prob = discretize(pde_system, discretization)
diff --git a/docs/src/tutorials/low_level.md b/docs/src/tutorials/low_level.md
index 90c75de303..4f7a232654 100644
--- a/docs/src/tutorials/low_level.md
+++ b/docs/src/tutorials/low_level.md
@@ -36,8 +36,8 @@ domains = [t ∈ Interval(0.0, 1.0),
     x ∈ Interval(-1.0, 1.0)]
 
 # Neural network
-chain = Lux.Chain(Dense(2, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
-strategy = NeuralPDE.QuadratureTraining(; abstol = 1e-6, reltol = 1e-6, batch = 200)
+chain = Chain(Dense(2, 16, σ), Dense(16, 16, σ), Dense(16, 1))
+strategy = QuadratureTraining(; abstol = 1e-6, reltol = 1e-6, batch = 200)
 
 indvars = [t, x]
 depvars = [u(t, x)]
@@ -60,14 +60,12 @@ end
 
 loss_functions = [pde_loss_functions; bc_loss_functions]
 
-function loss_function(θ, p)
-    sum(map(l -> l(θ), loss_functions))
-end
+loss_function(θ, p) = sum(map(l -> l(θ), loss_functions))
 
-f_ = OptimizationFunction(loss_function, Optimization.AutoZygote())
-prob = Optimization.OptimizationProblem(f_, sym_prob.flat_init_params)
+f_ = OptimizationFunction(loss_function, AutoZygote())
+prob = OptimizationProblem(f_, sym_prob.flat_init_params)
 
-res = Optimization.solve(prob, BFGS(linesearch = BackTracking()); maxiters = 3000)
+res = solve(prob, BFGS(linesearch = BackTracking()); maxiters = 3000)
 ```
 
 And some analysis:
diff --git a/docs/src/tutorials/low_level_2.md b/docs/src/tutorials/low_level_2.md
index 381026ab67..3a3b008c27 100644
--- a/docs/src/tutorials/low_level_2.md
+++ b/docs/src/tutorials/low_level_2.md
@@ -27,7 +27,7 @@ where $\theta = t - x/2$ and with initial and boundary conditions:
 With Bayesian Physics-Informed Neural Networks, here is an example of using `BayesianPINN` discretization with `ahmc_bayesian_pinn_pde` :
 
 ```@example low_level_2
-using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+using NeuralPDE, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
 import ModelingToolkit: Interval, infimum, supremum, Distributions
 using Plots, MonteCarloMeasurements
 
@@ -102,9 +102,7 @@ plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
 
 ```@example low_level_2
 # Neural network
-chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-    Lux.Dense(8, 8, Lux.tanh),
-    Lux.Dense(8, 1))
+chain = Chain(Dense(2, 8, tanh), Dense(8, 8, tanh), Dense(8, 1))
 
 discretization = NeuralPDE.BayesianPINN([chain],
     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
diff --git a/docs/src/tutorials/neural_adapter.md b/docs/src/tutorials/neural_adapter.md
index a2399c7860..bcff48fa36 100644
--- a/docs/src/tutorials/neural_adapter.md
+++ b/docs/src/tutorials/neural_adapter.md
@@ -60,7 +60,7 @@ chain2 = Lux.Chain(Dense(2, inner_, af),
     Dense(inner_, inner_, af),
     Dense(inner_, 1))
 initp, st = Lux.setup(Random.default_rng(), chain2)
-init_params2 = Float64.(ComponentArrays.ComponentArray(initp))
+init_params2 = Float64.(ComponentArray(initp))
 
 # the rule by which the training will take place is described here in loss function
 function loss(cord, θ)
@@ -226,7 +226,7 @@ chain2 = Lux.Chain(Dense(2, inner_, af),
     Dense(inner_, 1))
 
 initp, st = Lux.setup(Random.default_rng(), chain2)
-init_params2 = Float64.(ComponentArrays.ComponentArray(initp))
+init_params2 = Float64.(ComponentArray(initp))
 
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 
diff --git a/ext/NeuralPDETensorBoardLoggerExt.jl b/ext/NeuralPDETensorBoardLoggerExt.jl
new file mode 100644
index 0000000000..4115a427f3
--- /dev/null
+++ b/ext/NeuralPDETensorBoardLoggerExt.jl
@@ -0,0 +1,19 @@
+module NeuralPDETensorBoardLoggerExt
+
+using NeuralPDE: NeuralPDE
+using TensorBoardLogger: TBLogger, log_value
+
+function NeuralPDE.logvector(logger::TBLogger, vector::AbstractVector{<:Real},
+        name::AbstractString, step::Integer)
+    foreach(enumerate(vector)) do (j, v)
+        log_value(logger, "$(name)/$(j)", v; step)
+    end
+end
+
+function NeuralPDE.logscalar(logger::TBLogger, scalar::Real, name::AbstractString,
+        step::Integer)
+    log_value(logger, "$(name)", scalar; step)
+    return nothing
+end
+
+end
diff --git a/lib/NeuralPDELogging/LICENSE b/lib/NeuralPDELogging/LICENSE
deleted file mode 100644
index cc31a9f503..0000000000
--- a/lib/NeuralPDELogging/LICENSE
+++ /dev/null
@@ -1,9 +0,0 @@
-The NeuralPDE.jl package is licensed under the MIT "Expat" License:
-
-Copyright (c) 2017: ChrisRackauckas.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lib/NeuralPDELogging/Project.toml b/lib/NeuralPDELogging/Project.toml
deleted file mode 100644
index b2fd8d70bc..0000000000
--- a/lib/NeuralPDELogging/Project.toml
+++ /dev/null
@@ -1,27 +0,0 @@
-name = "NeuralPDELogging"
-uuid = "7c138fc3-9327-4ab8-b9a3-c864f3475625"
-authors = ["Zoe McCarthy <zoemccarthy12@gmail.com>"]
-version = "0.1.0"
-
-[deps]
-Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-NeuralPDE = "315f7962-48a3-4962-8226-d0f33b1235f0"
-TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
-
-[compat]
-NeuralPDE = "5"
-TensorBoardLogger = "0.1"
-julia = "1.6"
-
-[extras]
-Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
-Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
-OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
-ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test", "SafeTestsets", "Pkg", "Lux", "Optimization", "OptimizationOptimisers", "ModelingToolkit", "Random"]
diff --git a/lib/NeuralPDELogging/src/NeuralPDELogging.jl b/lib/NeuralPDELogging/src/NeuralPDELogging.jl
deleted file mode 100644
index 940dbe51a4..0000000000
--- a/lib/NeuralPDELogging/src/NeuralPDELogging.jl
+++ /dev/null
@@ -1,24 +0,0 @@
-module NeuralPDELogging
-
-using NeuralPDE
-using TensorBoardLogger
-
-"""This function overrides the empty function in NeuralPDE in order to use TensorBoardLogger in that package
-This is light type piracy but it should be alright since this is a subpackage of NeuralPDE"""
-function NeuralPDE.logvector(logger::TBLogger, vector::AbstractVector{R},
-        name::AbstractString, step::Integer) where {R <: Real}
-    for j in 1:length(vector)
-        log_value(logger, "$(name)/$(j)", vector[j], step = step)
-    end
-    nothing
-end
-
-"""This function overrides the empty function in NeuralPDE in order to use TensorBoardLogger in that package.  
-This is light type piracy but it should be alright since this is a subpackage of NeuralPDE"""
-function NeuralPDE.logscalar(logger::TBLogger, scalar::R, name::AbstractString,
-        step::Integer) where {R <: Real}
-    log_value(logger, "$(name)", scalar, step = step)
-    nothing
-end
-
-end
diff --git a/lib/NeuralPDELogging/test/adaptive_loss_log_tests.jl b/lib/NeuralPDELogging/test/adaptive_loss_log_tests.jl
deleted file mode 100644
index b037381afe..0000000000
--- a/lib/NeuralPDELogging/test/adaptive_loss_log_tests.jl
+++ /dev/null
@@ -1,135 +0,0 @@
-@info "adaptive_loss_logging_tests"
-using Test, NeuralPDE
-using Optimization, OptimizationOptimisers
-import ModelingToolkit: Interval, infimum, supremum
-using Random, Lux
-@info "Starting Soon!"
-
-nonadaptive_loss = NeuralPDE.NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
-gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3,
-    bc_loss_weights = 1)
-adaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1,
-    bc_loss_weights = 1)
-adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, adaptive_loss]
-maxiters = 800
-seed = 60
-
-## 2D Poisson equation
-function test_2d_poisson_equation_adaptive_loss(adaptive_loss, run, outdir, haslogger;
-        seed = 60, maxiters = 800)
-    logdir = joinpath(outdir, string(run))
-    if haslogger
-        logger = TBLogger(logdir)
-    else
-        logger = nothing
-    end
-    Random.seed!(seed)
-    hid = 40
-    chain_ = Lux.Chain(Dense(2, hid, Lux.σ), Dense(hid, hid, Lux.σ),
-        Dense(hid, 1))
-    strategy_ = NeuralPDE.StochasticTraining(256)
-    @info "adaptive reweighting test logdir: $(logdir), maxiters: $(maxiters), 2D Poisson equation, adaptive_loss: $(nameof(typeof(adaptive_loss))) "
-    @parameters x y
-    @variables u(..)
-    Dxx = Differential(x)^2
-    Dyy = Differential(y)^2
-
-    # 2D PDE
-    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-    # Initial and boundary conditions
-    bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sin(pi * 1) * sin(pi * y),
-        u(x, 0) ~ 0.0, u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
-    # Space and time domains
-    domains = [x ∈ Interval(0.0, 1.0),
-        y ∈ Interval(0.0, 1.0)]
-
-    iteration = [0]
-    discretization = NeuralPDE.PhysicsInformedNN(chain_,
-        strategy_;
-        adaptive_loss = adaptive_loss,
-        logger = logger,
-        iteration = iteration)
-
-    @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-    prob = NeuralPDE.discretize(pde_system, discretization)
-    phi = discretization.phi
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-
-    xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-    analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-    u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-        (length(xs), length(ys)))
-
-    callback = function (p, l)
-        iteration[1] += 1
-        if iteration[1] % 100 == 0
-            @info "Current loss is: $l, iteration is $(iteration[1])"
-        end
-        if haslogger
-            log_value(logger, "outer_error/loss", l, step = iteration[1])
-            if iteration[1] % 30 == 0
-                u_predict = reshape([first(phi([x, y], p.u)) for x in xs for y in ys],
-                    (length(xs), length(ys)))
-                diff_u = abs.(u_predict .- u_real)
-                total_diff = sum(diff_u)
-                log_value(logger, "outer_error/total_diff", total_diff, step = iteration[1])
-                total_u = sum(abs.(u_real))
-                total_diff_rel = total_diff / total_u
-                log_value(logger, "outer_error/total_diff_rel", total_diff_rel,
-                    step = iteration[1])
-                total_diff_sq = sum(diff_u .^ 2)
-                log_value(logger, "outer_error/total_diff_sq", total_diff_sq,
-                    step = iteration[1])
-            end
-        end
-        return false
-    end
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.03); maxiters = maxiters,
-        callback = callback)
-
-    u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-        (length(xs), length(ys)))
-    diff_u = abs.(u_predict .- u_real)
-    total_diff = sum(diff_u)
-    total_u = sum(abs.(u_real))
-    total_diff_rel = total_diff / total_u
-
-    #p1 = plot(xs, ys, u_real, linetype=:contourf,title = "analytic");
-    #p2 = plot(xs, ys, u_predict, linetype=:contourf,title = "predict");
-    #p3 = plot(xs, ys, diff_u,linetype=:contourf,title = "error");
-    #(plot=plot(p1,p2,p3), error=total_diff, total_diff_rel=total_diff_rel)
-    (error = total_diff, total_diff_rel = total_diff_rel)
-end
-
-possible_logger_dir = mktempdir()
-if ENV["LOG_SETTING"] == "NoImport"
-    haslogger = false
-    expected_log_folders = 0
-elseif ENV["LOG_SETTING"] == "ImportNoUse"
-    using NeuralPDELogging
-    haslogger = false
-    expected_log_folders = 0
-elseif ENV["LOG_SETTING"] == "ImportUse"
-    using NeuralPDELogging
-    using TensorBoardLogger
-    haslogger = true
-    expected_log_folders = 3
-end
-
-@info "has logger: $(haslogger), expected log folders: $(expected_log_folders)"
-
-function test_2d_poisson_equation_adaptive_loss_run_seediters(adaptive_loss, run)
-    test_2d_poisson_equation_adaptive_loss(adaptive_loss, run, possible_logger_dir,
-        haslogger; seed = seed, maxiters = maxiters)
-end
-error_results = map(test_2d_poisson_equation_adaptive_loss_run_seediters, adaptive_losses,
-    1:length(adaptive_losses))
-
-@test length(readdir(possible_logger_dir)) == expected_log_folders
-if expected_log_folders > 0
-    @info "dirs at $(possible_logger_dir): $(string(readdir(possible_logger_dir)))"
-    for logdir in readdir(possible_logger_dir)
-        @test length(readdir(joinpath(possible_logger_dir, logdir))) > 0
-    end
-end
diff --git a/lib/NeuralPDELogging/test/runtests.jl b/lib/NeuralPDELogging/test/runtests.jl
deleted file mode 100644
index 2f4d45864e..0000000000
--- a/lib/NeuralPDELogging/test/runtests.jl
+++ /dev/null
@@ -1,45 +0,0 @@
-using Pkg
-using SafeTestsets
-
-const GROUP = get(ENV, "GROUP", "All")
-
-const is_APPVEYOR = Sys.iswindows() && haskey(ENV, "APPVEYOR")
-
-const is_TRAVIS = haskey(ENV, "TRAVIS")
-
-is_CI = haskey(ENV, "CI")
-
-@time begin
-    if GROUP == "All" || GROUP == "Logging"
-        @time @safetestset "AdaptiveLossLogNoImport" begin
-            using Pkg
-            neuralpde_dir = dirname(abspath(joinpath(@__DIR__, "..", "..", "..")))
-            @info "loading neuralpde package at : $(neuralpde_dir)"
-            neuralpde = Pkg.PackageSpec(path = neuralpde_dir)
-            Pkg.develop(neuralpde)
-            @info "making sure that there are no logs without having imported NeuralPDELogging"
-            ENV["LOG_SETTING"] = "NoImport"
-            include("adaptive_loss_log_tests.jl")
-        end
-        @time @safetestset "AdaptiveLossLogImportNoUse" begin
-            using Pkg
-            neuralpde_dir = dirname(abspath(joinpath(@__DIR__, "..", "..", "..")))
-            @info "loading neuralpde package at : $(neuralpde_dir)"
-            neuralpde = Pkg.PackageSpec(path = neuralpde_dir)
-            Pkg.develop(neuralpde)
-            @info "making sure that there are still no logs now that we have imported NeuralPDELogging"
-            ENV["LOG_SETTING"] = "ImportNoUse"
-            include("adaptive_loss_log_tests.jl")
-        end
-        @time @safetestset "AdaptiveLossLogImportUse" begin
-            using Pkg
-            neuralpde_dir = dirname(abspath(joinpath(@__DIR__, "..", "..", "..")))
-            @info "loading neuralpde package at : $(neuralpde_dir)"
-            neuralpde = Pkg.PackageSpec(path = neuralpde_dir)
-            Pkg.develop(neuralpde)
-            ENV["LOG_SETTING"] = "ImportUse"
-            @info "making sure that logs are generated now if we use a logger"
-            include("adaptive_loss_log_tests.jl")
-        end
-    end
-end
diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 9960006b18..f65f1d659e 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -1,16 +1,18 @@
 # HIGH level API for BPINN ODE solver
 
 """
-    BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
-                        priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
-                        phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
-                        MCMCargs = (n_leapfrog=30), nchains = 1, init_params = nothing,
-                        Adaptorkwargs = (Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, Metric = DiagEuclideanMetric),
-                        Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
-                        progress = false, verbose = false)
-
-Algorithm for solving ordinary differential equations using a Bayesian neural network. This is a specialization
-of the physics-informed neural network which is used as a solver for a standard `ODEProblem`.
+    BNNODE(chain, kernel = HMC; strategy = nothing, draw_samples = 2000,
+           priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
+           phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
+           MCMCargs = (; n_leapfrog=30), nchains = 1, init_params = nothing,
+           Adaptorkwargs = (; Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
+                              Metric = DiagEuclideanMetric),
+           Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
+           progress = false, verbose = false)
+
+Algorithm for solving ordinary differential equations using a Bayesian neural network. This
+is a specialization of the physics-informed neural network which is used as a solver for a
+standard `ODEProblem`.
 
 !!! warn
 
@@ -20,10 +22,11 @@ of the physics-informed neural network which is used as a solver for a standard
 
 ## Positional Arguments
 
-* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer`.
-* `Kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC`
+* `chain`: A neural network architecture, defined as a `Lux.AbstractLuxLayer`.
+* `kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC`
 
 ## Keyword Arguments
+
 (refer `NeuralPDE.ahmc_bayesian_pinn_ode` keyword arguments.)
 
 ## Example
@@ -44,18 +47,15 @@ dataset = [x̂, time]
 
 chainlux = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
 
-alg = BNNODE(chainlux, draw_samples = 2000,
-                       l2std = [0.05], phystd = [0.05],
-                       priorsNNw = (0.0, 3.0), progress = true)
+alg = BNNODE(chainlux; draw_samples = 2000, l2std = [0.05], phystd = [0.05],
+             priorsNNw = (0.0, 3.0), progress = true)
 
 sol_lux = solve(prob, alg)
 
 # with parameter estimation
-alg = BNNODE(chainlux,dataset = dataset,
-                draw_samples = 2000,l2std = [0.05],
-                phystd = [0.05],priorsNNw = (0.0, 10.0),
-                param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
-                progress = true)
+alg = BNNODE(chainlux; dataset, draw_samples = 2000, l2std = [0.05], phystd = [0.05],
+             priorsNNw = (0.0, 10.0), param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
+             progress = true)
 
 sol_lux_pestim = solve(prob, alg)
 ```
@@ -71,61 +71,48 @@ is an accurate interpolation (up to the neural network training result). In addi
 
 ## References
 
-Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural Networks for
-Forward and Inverse PDE Problems with Noisy Data".
+Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural
+Networks for Forward and Inverse PDE Problems with Noisy Data".
 
 Kevin Linka, Amelie Schäfer, Xuhui Meng, Zongren Zou, George Em Karniadakis, Ellen Kuhl
 "Bayesian Physics Informed Neural Networks for real-world nonlinear dynamical systems".
 """
-struct BNNODE{C, K, IT <: NamedTuple,
-    A <: NamedTuple, H <: NamedTuple,
-    ST <: Union{Nothing, AbstractTrainingStrategy},
-    I <: Union{Nothing, <:NamedTuple, Vector{<:AbstractFloat}},
-    P <: Union{Nothing, Vector{<:Distribution}},
-    D <:
-    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}} <:
-       NeuralPDEAlgorithm
-    chain::C
-    Kernel::K
-    strategy::ST
-    draw_samples::Int64
+@concrete struct BNNODE <: NeuralPDEAlgorithm
+    chain <: AbstractLuxLayer
+    kernel
+    strategy <: Union{Nothing, AbstractTrainingStrategy}
+    draw_samples::Int
     priorsNNw::Tuple{Float64, Float64}
-    param::P
+    param <: Union{Nothing, Vector{<:Distribution}}
     l2std::Vector{Float64}
     phystd::Vector{Float64}
-    dataset::D
+    dataset <: Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
     physdt::Float64
-    MCMCkwargs::H
-    nchains::Int64
-    init_params::I
-    Adaptorkwargs::A
-    Integratorkwargs::IT
-    numensemble::Int64
+    MCMCkwargs <: NamedTuple
+    nchains::Int
+    init_params <: Union{Nothing, <:NamedTuple, Vector{<:AbstractFloat}}
+    Adaptorkwargs <: NamedTuple
+    Integratorkwargs <: NamedTuple
+    numensemble::Int
     estim_collocate::Bool
     autodiff::Bool
     progress::Bool
     verbose::Bool
 end
-function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
+
+function BNNODE(chain, kernel = HMC; strategy = nothing, draw_samples = 2000,
         priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05],
-        dataset = [nothing], physdt = 1 / 20.0, MCMCkwargs = (n_leapfrog = 30,), nchains = 1,
-        init_params = nothing,
+        dataset = [nothing], physdt = 1 / 20.0, MCMCkwargs = (n_leapfrog = 30,),
+        nchains = 1, init_params = nothing,
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-            Metric = DiagEuclideanMetric,
-            targetacceptancerate = 0.8),
+            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,),
         numensemble = floor(Int, draw_samples / 3),
-        estim_collocate = false,
-        autodiff = false, progress = false, verbose = false)
-    !(chain isa Lux.AbstractExplicitLayer) &&
-        (chain = adapt(FromFluxAdaptor(false, false), chain))
-    BNNODE(chain, Kernel, strategy,
-        draw_samples, priorsNNw, param, l2std,
-        phystd, dataset, physdt, MCMCkwargs,
-        nchains, init_params,
-        Adaptorkwargs, Integratorkwargs,
-        numensemble, estim_collocate,
-        autodiff, progress, verbose)
+        estim_collocate = false, autodiff = false, progress = false, verbose = false)
+    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
+    return BNNODE(chain, kernel, strategy, draw_samples, priorsNNw, param, l2std, phystd,
+        dataset, physdt, MCMCkwargs, nchains, init_params, Adaptorkwargs,
+        Integratorkwargs, numensemble, estim_collocate, autodiff, progress, verbose)
 end
 
 """
@@ -143,98 +130,59 @@ Contains `ahmc_bayesian_pinn_ode()` function output:
     - step_size
     - nom_step_size
 """
-struct BPINNstats{MC, S, ST}
-    mcmc_chain::MC
-    samples::S
-    statistics::ST
+@concrete struct BPINNstats
+    mcmc_chain
+    samples
+    statistics
 end
 
 """
-BPINN Solution contains the original solution from AdvancedHMC.jl sampling (BPINNstats contains fields related to that).
+BPINN Solution contains the original solution from AdvancedHMC.jl sampling (BPINNstats
+contains fields related to that).
 
-1. `ensemblesol` is the Probabilistic Estimate (MonteCarloMeasurements.jl Particles type) of Ensemble solution from All Neural Network's (made using all sampled parameters) output's.
+1. `ensemblesol` is the Probabilistic Estimate (MonteCarloMeasurements.jl Particles type) of
+   Ensemble solution from All Neural Network's (made using all sampled parameters) output's.
 2. `estimated_nn_params` - Probabilistic Estimate of NN params from sampled weights, biases.
-3. `estimated_de_params` - Probabilistic Estimate of DE params from sampled unknown DE parameters.
+3. `estimated_de_params` - Probabilistic Estimate of DE params from sampled unknown DE
+   parameters.
 """
-struct BPINNsolution{O <: BPINNstats, E, NP, OP, P}
-    original::O
-    ensemblesol::E
-    estimated_nn_params::NP
-    estimated_de_params::OP
-    timepoints::P
-
-    function BPINNsolution(original,
-            ensemblesol,
-            estimated_nn_params,
-            estimated_de_params,
-            timepoints)
-        new{typeof(original), typeof(ensemblesol), typeof(estimated_nn_params),
-            typeof(estimated_de_params), typeof(timepoints)}(
-            original, ensemblesol, estimated_nn_params,
-            estimated_de_params, timepoints)
-    end
+@concrete struct BPINNsolution
+    original <: BPINNstats
+    ensemblesol
+    estimated_nn_params
+    estimated_de_params
+    timepoints
 end
 
-function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
-        alg::BNNODE,
-        args...;
-        dt = nothing,
-        timeseries_errors = true,
-        save_everystep = true,
-        adaptive = false,
-        abstol = 1.0f-6,
-        reltol = 1.0f-3,
-        verbose = false,
-        saveat = 1 / 50.0,
-        maxiters = nothing,
-        numensemble = floor(Int, alg.draw_samples / 3))
-    @unpack chain, l2std, phystd, param, priorsNNw, Kernel, strategy,
-    draw_samples, dataset, init_params,
-    nchains, physdt, Adaptorkwargs, Integratorkwargs,
-    MCMCkwargs, numensemble, estim_collocate, autodiff, progress,
-    verbose = alg
+function SciMLBase.__solve(prob::SciMLBase.ODEProblem, alg::BNNODE, args...; dt = nothing,
+        timeseries_errors = true, save_everystep = true, adaptive = false,
+        abstol = 1.0f-6, reltol = 1.0f-3, verbose = false, saveat = 1 / 50.0,
+        maxiters = nothing, numensemble = floor(Int, alg.draw_samples / 3))
+    (; chain, param, strategy, draw_samples, numensemble, verbose) = alg
 
     # ahmc_bayesian_pinn_ode needs param=[] for easier vcat operation for full vector of parameters
     param = param === nothing ? [] : param
     strategy = strategy === nothing ? GridTraining : strategy
 
-    if draw_samples < 0
-        throw(error("Number of samples to be drawn has to be >=0."))
-    end
+    @assert alg.draw_samples≥0 "Number of samples to be drawn has to be >=0."
 
-    mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain,
-        strategy = strategy, dataset = dataset,
-        draw_samples = draw_samples,
-        init_params = init_params,
-        physdt = physdt, l2std = l2std,
-        phystd = phystd,
-        priorsNNw = priorsNNw,
-        param = param,
-        nchains = nchains,
-        autodiff = autodiff,
-        Kernel = Kernel,
-        Adaptorkwargs = Adaptorkwargs,
-        Integratorkwargs = Integratorkwargs,
-        MCMCkwargs = MCMCkwargs,
-        progress = progress,
-        verbose = verbose,
-        estim_collocate = estim_collocate)
+    mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(
+        prob, chain; strategy, alg.dataset, alg.draw_samples, alg.init_params,
+        alg.physdt, alg.l2std, alg.phystd, alg.priorsNNw, param, alg.nchains, alg.autodiff,
+        Kernel = alg.kernel, alg.Adaptorkwargs, alg.Integratorkwargs,
+        alg.MCMCkwargs, alg.progress, alg.verbose, alg.estim_collocate)
 
     fullsolution = BPINNstats(mcmcchain, samples, statistics)
     ninv = length(param)
     t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2])
 
-    if chain isa Lux.AbstractExplicitLayer
-        θinit, st = Lux.setup(Random.default_rng(), chain)
-        θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
-             for i in 1:max(draw_samples - draw_samples ÷ 10, draw_samples - 1000)]
+    θinit, st = LuxCore.setup(Random.default_rng(), chain)
+    θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
+         for i in 1:max(draw_samples - draw_samples ÷ 10, draw_samples - 1000)]
 
-        luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
-        # only need for size
-        θinit = collect(ComponentArrays.ComponentArray(θinit))
-    else
-        throw(error("Only Lux.AbstractExplicitLayer neural networks are supported"))
-    end
+    luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
+    # only need for size
+    θinit = collect(ComponentArray(θinit))
 
     # constructing ensemble predictions
     ensemblecurves = Vector{}[]
@@ -277,5 +225,5 @@ function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
                             for i in (nnparams + 1):(nnparams + ninv)]
     end
 
-    BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params, t)
+    return BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params, t)
 end
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index a2ffc2370a..c0798c6270 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -1,38 +1,58 @@
-"""
-$(DocStringExtensions.README)
-"""
 module NeuralPDE
 
-using DocStringExtensions
-using Reexport, Statistics
-@reexport using SciMLBase
-@reexport using ModelingToolkit
-
-using Zygote, ForwardDiff, Random, Distributions
-using Adapt, DiffEqNoiseProcess
-using Optimization
-using OptimizationOptimisers
-using Integrals, Cubature
-using QuasiMonteCarlo: LatinHypercubeSample
-import QuasiMonteCarlo
-using RuntimeGeneratedFunctions
-using Statistics
-using ArrayInterface
-import Optim
-using Symbolics: wrap, unwrap, arguments, operation
-using SymbolicUtils
-using AdvancedHMC, LogDensityProblems, LinearAlgebra, Functors, MCMCChains
-using MonteCarloMeasurements: Particles
-using ModelingToolkit: value, nameof, toexpr, build_expr, expand_derivatives, Interval,
-                       infimum, supremum
-import DomainSets
-using DomainSets: Domain, ClosedInterval, AbstractInterval, leftendpoint, rightendpoint,
-                  ProductDomain
-using SciMLBase: @add_kwonly, parameterless_type
-using UnPack: @unpack
-import ChainRulesCore, Lux, ComponentArrays
+using ADTypes: ADTypes, AutoForwardDiff, AutoZygote
+using Adapt: Adapt
+using ArrayInterface: ArrayInterface
+using ChainRulesCore: ChainRulesCore, @non_differentiable, @ignore_derivatives
+using Cubature: Cubature
+using ComponentArrays: ComponentArrays, ComponentArray, getdata, getaxes
+using ConcreteStructs: @concrete
+using DocStringExtensions: FIELDS
+using DomainSets: DomainSets, AbstractInterval, leftendpoint, rightendpoint, ProductDomain
+using ForwardDiff: ForwardDiff
+using Functors: Functors, fmap
+using Integrals: Integrals, CubatureJLh, QuadGKJL
+using IntervalSets: infimum, supremum
+using LinearAlgebra: Diagonal
+using Lux: Lux, Chain, Dense, SkipConnection, StatefulLuxLayer
 using Lux: FromFluxAdaptor, recursive_eltype
-using ChainRulesCore: @non_differentiable
+using LuxCore: LuxCore, AbstractLuxLayer, AbstractLuxWrapperLayer
+using MLDataDevices: CPUDevice, get_device
+using Optimisers: Optimisers, Adam
+using Optimization: Optimization
+using OptimizationOptimisers: OptimizationOptimisers
+using Printf: @printf
+using Random: Random, AbstractRNG
+using RecursiveArrayTools: DiffEqArray
+using Reexport: @reexport
+using RuntimeGeneratedFunctions: RuntimeGeneratedFunctions, @RuntimeGeneratedFunction
+using SciMLBase: SciMLBase, BatchIntegralFunction, IntegralProblem, NoiseProblem,
+                 OptimizationFunction, OptimizationProblem, ReturnCode, discretize,
+                 isinplace, solve, symbolic_discretize
+using Statistics: Statistics, mean
+using QuasiMonteCarlo: QuasiMonteCarlo, LatinHypercubeSample
+using WeightInitializers: glorot_uniform, zeros32
+using Zygote: Zygote
+
+# Symbolic Stuff
+using ModelingToolkit: ModelingToolkit, PDESystem, Differential, toexpr
+using Symbolics: Symbolics, unwrap, arguments, operation, build_expr, Num,
+                 expand_derivatives
+using SymbolicUtils: SymbolicUtils
+using SymbolicIndexingInterface: SymbolicIndexingInterface
+
+# Needed for the Bayesian Stuff
+using AdvancedHMC: AdvancedHMC, DiagEuclideanMetric, HMC, HMCDA, Hamiltonian,
+                   JitteredLeapfrog, Leapfrog, MassMatrixAdaptor, NUTS, StanHMCAdaptor,
+                   StepSizeAdaptor, TemperedLeapfrog, find_good_stepsize
+using Distributions: Distributions, Distribution, MvNormal, Normal, dim, logpdf
+using LogDensityProblems: LogDensityProblems
+using MCMCChains: MCMCChains, Chains, sample
+using MonteCarloMeasurements: Particles
+
+import LuxCore: initialparameters, initialstates, parameterlength
+
+@reexport using SciMLBase, ModelingToolkit
 
 RuntimeGeneratedFunctions.init(@__MODULE__)
 
@@ -40,32 +60,54 @@ abstract type AbstractPINN end
 
 abstract type AbstractTrainingStrategy end
 
+const cdev = CPUDevice()
+
+@inline safe_get_device(x) = safe_get_device(get_device(x), x)
+@inline safe_get_device(::Nothing, x) = cdev
+@inline safe_get_device(dev, _) = dev
+
+@inline safe_expand(dev, x) = dev(x)
+@inline safe_expand(::CPUDevice, x::AbstractRange) = x
+@inline safe_collect(dev, x::AbstractRange) = dev(collect(x))
+
+include("eltype_matching.jl")
+
 include("pinn_types.jl")
 include("symbolic_utilities.jl")
 include("training_strategies.jl")
 include("adaptive_losses.jl")
+
 include("ode_solve.jl")
-# include("rode_solve.jl")
 include("dae_solve.jl")
+
 include("transform_inf_integral.jl")
 include("discretize.jl")
+
 include("neural_adapter.jl")
 include("advancedHMC_MCMC.jl")
 include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
+
 include("dgm.jl")
 
-export NNODE, NNDAE,
-       PhysicsInformedNN, discretize,
-       GridTraining, StochasticTraining, QuadratureTraining, QuasiRandomTraining,
-       WeightedIntervalTraining,
-       build_loss_function, get_loss_function,
+export NNODE, NNDAE
+export BNNODE, ahmc_bayesian_pinn_ode, ahmc_bayesian_pinn_pde
+export PhysicsInformedNN, discretize
+export BPINNsolution, BayesianPINN
+export DeepGalerkin
+
+export neural_adapter
+
+export GridTraining, StochasticTraining, QuadratureTraining, QuasiRandomTraining,
+       WeightedIntervalTraining
+
+export build_loss_function, get_loss_function,
        generate_training_sets, get_variables, get_argument, get_bounds,
-       get_numeric_integral, symbolic_discretize,
-       AbstractAdaptiveLoss, NonAdaptiveLoss, GradientScaleAdaptiveLoss,
-       MiniMaxAdaptiveLoss, LogOptions,
-       ahmc_bayesian_pinn_ode, BNNODE, ahmc_bayesian_pinn_pde, vector_to_parameters,
-       BPINNsolution, BayesianPINN,
-       DeepGalerkin
+       get_numeric_integral, symbolic_discretize, vector_to_parameters
+
+export AbstractAdaptiveLoss, NonAdaptiveLoss, GradientScaleAdaptiveLoss,
+       MiniMaxAdaptiveLoss
+
+export LogOptions
 
 end # module
diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 0bf18c4f0e..c57bcd71cb 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -1,78 +1,26 @@
-mutable struct PDELogTargetDensity{
-    ST <: AbstractTrainingStrategy,
-    D <: Union{Nothing, Vector{<:Matrix{<:Real}}},
-    P <: Vector{<:Distribution},
-    I,
-    F,
-    PH
-}
-    dim::Int64
-    strategy::ST
-    dataset::D
-    priors::P
+@concrete struct PDELogTargetDensity
+    dim::Int
+    strategy <: AbstractTrainingStrategy
+    dataset <: Union{Nothing, Vector{<:Matrix{<:Real}}}
+    priors <: Vector{<:Distribution}
     allstd::Vector{Vector{Float64}}
     names::Tuple
     extraparams::Int
-    init_params::I
-    full_loglikelihood::F
-    Φ::PH
-
-    function PDELogTargetDensity(dim, strategy, dataset,
-            priors, allstd, names, extraparams,
-            init_params::AbstractVector, full_loglikelihood, Φ)
-        new{
-            typeof(strategy),
-            typeof(dataset),
-            typeof(priors),
-            typeof(init_params),
-            typeof(full_loglikelihood),
-            typeof(Φ)
-        }(dim,
-            strategy,
-            dataset,
-            priors,
-            allstd,
-            names,
-            extraparams,
-            init_params,
-            full_loglikelihood,
-            Φ)
-    end
-    function PDELogTargetDensity(dim, strategy, dataset,
-            priors, allstd, names, extraparams,
-            init_params::Union{NamedTuple, ComponentArrays.ComponentVector},
-            full_loglikelihood, Φ)
-        new{
-            typeof(strategy),
-            typeof(dataset),
-            typeof(priors),
-            typeof(init_params),
-            typeof(full_loglikelihood),
-            typeof(Φ)
-        }(dim,
-            strategy,
-            dataset,
-            priors,
-            allstd,
-            names,
-            extraparams,
-            init_params,
-            full_loglikelihood,
-            Φ)
-    end
+    init_params <: Union{AbstractVector, NamedTuple, ComponentArray}
+    full_loglikelihood
+    Φ
 end
 
-function LogDensityProblems.logdensity(Tar::PDELogTargetDensity, θ)
+function LogDensityProblems.logdensity(ltd::PDELogTargetDensity, θ)
     # for parameter estimation neccesarry to use multioutput case
-    return Tar.full_loglikelihood(setparameters(Tar, θ),
-               Tar.allstd) + priorlogpdf(Tar, θ) + L2LossData(Tar, θ)
-    # + L2loss2(Tar, θ)
+    return ltd.full_loglikelihood(setparameters(ltd, θ), ltd.allstd) + priorlogpdf(ltd, θ) +
+           L2LossData(ltd, θ)
 end
 
-function setparameters(Tar::PDELogTargetDensity, θ)
-    names = Tar.names
-    ps_new = θ[1:(end - Tar.extraparams)]
-    ps = Tar.init_params
+@views function setparameters(ltd::PDELogTargetDensity, θ)
+    names = ltd.names
+    ps_new = θ[1:(end - ltd.extraparams)]
+    ps = ltd.init_params
 
     # multioutput case for Lux chains, for each depvar ps would contain Lux ComponentVectors
     # which we use for mapping current ahmc sampled vector of parameters onto NNs
@@ -80,81 +28,68 @@ function setparameters(Tar::PDELogTargetDensity, θ)
     Luxparams = [vector_to_parameters(ps_new[((i += length(ps[x])) - length(ps[x]) + 1):i],
                      ps[x]) for x in names]
 
-    a = ComponentArrays.ComponentArray(NamedTuple{Tar.names}(i for i in Luxparams))
+    a = ComponentArray(NamedTuple{ltd.names}(i for i in Luxparams))
 
-    if Tar.extraparams > 0
-        b = θ[(end - Tar.extraparams + 1):end]
-        return ComponentArrays.ComponentArray(;
-            depvar = a,
-            p = b)
+    if ltd.extraparams > 0
+        return ComponentArray(; depvar = a, p = θ[(end - ltd.extraparams + 1):end])
     else
-        return ComponentArrays.ComponentArray(;
-            depvar = a)
+        return ComponentArray(; depvar = a)
     end
 end
 
-LogDensityProblems.dimension(Tar::PDELogTargetDensity) = Tar.dim
+LogDensityProblems.dimension(ltd::PDELogTargetDensity) = ltd.dim
 
 function LogDensityProblems.capabilities(::PDELogTargetDensity)
     LogDensityProblems.LogDensityOrder{1}()
 end
 
 # L2 losses loglikelihood(needed mainly for ODE parameter estimation)
-function L2LossData(Tar::PDELogTargetDensity, θ)
-    Φ = Tar.Φ
-    init_params = Tar.init_params
-    dataset = Tar.dataset
-    sumt = 0
-    L2stds = Tar.allstd[3]
+function L2LossData(ltd::PDELogTargetDensity, θ)
+    Φ = ltd.Φ
+    init_params = ltd.init_params
+    dataset = ltd.dataset
+    L2stds = ltd.allstd[3]
     # each dep var has a diff dataset depending on its indep var and their domains
     # these datasets are matrices of first col-dep var and remaining cols-all indep var
-    # Tar.init_params is needed to construct a vector of parameters into a ComponentVector
+    # ltd.init_params is needed to construct a vector of parameters into a ComponentVector
 
     # dataset of form Vector[matrix_x, matrix_y, matrix_z]
     # matrix_i is of form [i,indvar1,indvar2,..] (needed in case if heterogenous domains)
 
     # Phi is the trial solution for each NN in chain array
     # Creating logpdf( MvNormal(Phi(t,θ),std), dataset[i] )
-    # dataset[i][:, 2:end] -> indepvar cols of a particular depvar's dataset 
+    # dataset[i][:, 2:end] -> indepvar cols of a particular depvar's dataset
     # dataset[i][:, 1] -> depvar col of depvar's dataset
 
-    if Tar.extraparams > 0
-        for i in eachindex(Φ)
-            sumt += logpdf(
-                MvNormal(
-                    Φ[i](dataset[i][:, 2:end]',
-                        vector_to_parameters(θ[1:(end - Tar.extraparams)],
-                            init_params)[Tar.names[i]])[1,
-                        :],
-                    LinearAlgebra.Diagonal(abs2.(ones(size(dataset[i])[1]) .*
-                                                 L2stds[i]))),
-                dataset[i][:, 1])
-        end
-        return sumt
+    ltd.extraparams ≤ 0 && return false
+
+    sumt = 0
+    for i in eachindex(Φ)
+        sumt += logpdf(
+            MvNormal(
+                Φ[i](dataset[i][:, 2:end]',
+                    vector_to_parameters(θ[1:(end - ltd.extraparams)], init_params)[ltd.names[i]])[
+                    1, :],
+                Diagonal(abs2.(ones(size(dataset[i])[1]) .* L2stds[i]))),
+            dataset[i][:, 1])
     end
-    return 0
+    return sumt
 end
 
 # priors for NN parameters + ODE constants
-function priorlogpdf(Tar::PDELogTargetDensity, θ)
-    allparams = Tar.priors
+function priorlogpdf(ltd::PDELogTargetDensity, θ)
+    allparams = ltd.priors
     # Vector of ode parameters priors
     invpriors = allparams[2:end]
-
-    # nn weights
     nnwparams = allparams[1]
 
-    if Tar.extraparams > 0
-        invlogpdf = sum(
-            logpdf(invpriors[length(θ) - i + 1], θ[i])
-            for i in (length(θ) - Tar.extraparams + 1):length(θ);
-            init = 0.0)
+    ltd.extraparams ≤ 0 && return logpdf(nnwparams, θ)
 
-        return (invlogpdf
-                +
-                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
+    invlogpdf = sum((length(θ) - ltd.extraparams + 1):length(θ)) do i
+        logpdf(invpriors[length(θ) - i + 1], θ[i])
     end
-    return logpdf(nnwparams, θ)
+
+    return invlogpdf + logpdf(nnwparams, θ[1:(length(θ) - ltd.extraparams)])
 end
 
 function integratorchoice(Integratorkwargs, initial_ϵ)
@@ -244,54 +179,63 @@ end
 
 """
     ahmc_bayesian_pinn_pde(pde_system, discretization;
-            draw_samples = 1000,
-            bcstd = [0.01], l2std = [0.05],
-            phystd = [0.05], priorsNNw = (0.0, 2.0),
-            param = [], nchains = 1, Kernel = HMC(0.1, 30),
-            Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-                Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-            Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
-            numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)               
+        draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],
+        priorsNNw = (0.0, 2.0), param = [], nchains = 1, Kernel = HMC(0.1, 30),
+        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+        Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
+        numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)
 
 ## NOTES
 
 * Dataset is required for accurate Parameter estimation + solving equations.
-* Returned solution is a BPINNsolution consisting of Ensemble solution, estimated PDE and NN parameters
-  for chosen `saveats` grid spacing and last n = `numensemble` samples in Chain. the complete set of samples
-  in the MCMC chain is returned as `fullsolution`,  refer `BPINNsolution` for more details.
+* Returned solution is a BPINNsolution consisting of Ensemble solution, estimated PDE and NN
+  parameters for chosen `saveats` grid spacing and last n = `numensemble` samples in Chain.
+  the complete set of samples in the MCMC chain is returned as `fullsolution`,  refer
+  `BPINNsolution` for more details.
 
 ## Positional Arguments
 
 * `pde_system`: ModelingToolkit defined PDE equation or system of equations.
-* `discretization`: BayesianPINN discretization for the given pde_system, Neural Network and training strategy.
+* `discretization`: BayesianPINN discretization for the given pde_system, Neural Network and
+  training strategy.
 
 ## Keyword Arguments
 
-* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
-* `bcstd`: Vector of standard deviations of BPINN prediction against Initial/Boundary Condition equations.
-* `l2std`: Vector of standard deviations of BPINN prediction against L2 losses/Dataset for each dependant variable of interest.
-* `phystd`: Vector of standard deviations of BPINN prediction against Chosen Underlying PDE equations.
-* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of BPINN are Normal Distributions by default.
+* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are
+  ~2/3 of draw samples)
+* `bcstd`: Vector of standard deviations of BPINN prediction against Initial/Boundary
+  Condition equations.
+* `l2std`: Vector of standard deviations of BPINN prediction against L2 losses/Dataset for
+  each dependant variable of interest.
+* `phystd`: Vector of standard deviations of BPINN prediction against Chosen Underlying PDE
+  equations.
+* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of
+  BPINN are Normal Distributions by default.
 * `param`: Vector of chosen PDE's parameter's Distributions in case of Inverse problems.
 * `nchains`: number of chains you want to sample.
-* `Kernel`: Choice of MCMC Sampling Algorithm object HMC/NUTS/HMCDA (AdvancedHMC.jl implementations).
-* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-   Note: Target percentage(in decimal) of iterations in which the proposals are accepted (0.8 by default).
-* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-* `saveats`: Grid spacing for each independent variable for evaluation of ensemble solution, estimated parameters.
-* `numensemble`: Number of last samples to take for creation of ensemble solution, estimated parameters.
+* `Kernel`: Choice of MCMC Sampling Algorithm object HMC/NUTS/HMCDA (AdvancedHMC.jl
+  implementations).
+* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer:
+  https://turinglang.org/AdvancedHMC.jl/stable/. Note: Target percentage(in decimal) of
+  iterations in which the proposals are accepted (0.8 by default).
+* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer:
+  https://turinglang.org/AdvancedHMC.jl/stable/
+* `saveats`: Grid spacing for each independent variable for evaluation of ensemble solution,
+  estimated parameters.
+* `numensemble`: Number of last samples to take for creation of ensemble solution, estimated
+  parameters.
 * `progress`: controls whether to show the progress meter or not.
 * `verbose`: controls the verbosity. (Sample call args in AHMC).
 
-## Warnings
+!!! warning
 
-* AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
+    AdvancedHMC.jl is still developing convenience structs so might need changes on new
+    releases.
 """
 function ahmc_bayesian_pinn_pde(pde_system, discretization;
-        draw_samples = 1000,
-        bcstd = [0.01], l2std = [0.05],
-        phystd = [0.05], priorsNNw = (0.0, 2.0),
-        param = [], nchains = 1, Kernel = HMC(0.1, 30),
+        draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],
+        priorsNNw = (0.0, 2.0), param = [], nchains = 1, Kernel = HMC(0.1, 30),
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
@@ -314,7 +258,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     elseif discretization.param_estim && dataset isa Nothing
         throw(UndefVarError(:dataset))
     elseif discretization.param_estim && length(l2std) != length(pinnrep.depvars)
-        throw(error("L2 stds length must match number of dependant variables"))
+        error("L2 stds length must match number of dependant variables")
     end
 
     # for physics loglikelihood
@@ -322,18 +266,13 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     chain = discretization.chain
 
     if length(pinnrep.domains) != length(saveats)
-        throw(error("Number of independent variables must match saveat inference discretization steps"))
+        error("Number of independent variables must match saveat inference discretization steps")
     end
 
     # NN solutions for loglikelihood which is used for L2lossdata
     Φ = pinnrep.phi
 
-    # for new L2 loss
-    # discretization.additional_loss = 
-
-    if nchains < 1
-        throw(error("number of chains must be greater than or equal to 1"))
-    end
+    @assert nchains≥1 "number of chains must be greater than or equal to 1"
 
     # remove inv params take only NN params, AHMC uses Float64
     initial_nnθ = pinnrep.flat_init_params[1:(end - length(param))]
@@ -350,13 +289,13 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # add init_params for NN params
     priors = [
         MvNormal(priorsNNw[1] * ones(nparameters),
-        LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
+        Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
     ]
 
     # append Ode params to all paramvector - initial_θ
     if ninv > 0
         # shift ode params(initialise ode params by prior means)
-        # check if means or user speified is better
+        # check if means or user specified is better
         initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
         priors = vcat(priors, param)
         nparameters += ninv
@@ -365,17 +304,10 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # vector in case of N-dimensional domains
     strategy = discretization.strategy
 
-    # dimensions would be total no of params,initial_nnθ for Lux namedTuples 
-    ℓπ = PDELogTargetDensity(nparameters,
-        strategy,
-        dataset,
-        priors,
-        [phystd, bcstd, l2std],
-        names,
-        ninv,
-        initial_nnθ,
-        full_weighted_loglikelihood,
-        Φ)
+    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
+    ℓπ = PDELogTargetDensity(
+        nparameters, strategy, dataset, priors, [phystd, bcstd, l2std],
+        names, ninv, initial_nnθ, full_weighted_loglikelihood, Φ)
 
     Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
     Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
@@ -384,11 +316,13 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     metric = Metric(nparameters)
     hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
 
-    @info("Current Physics Log-likelihood : ",
-        ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ),
-            ℓπ.allstd))
-    @info("Current Prior Log-likelihood : ", priorlogpdf(ℓπ, initial_θ))
-    @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
+    if verbose
+        @printf("Current Physics Log-likelihood : %g\n",
+            ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd))
+        @printf("Current Prior Log-likelihood : %g\n", priorlogpdf(ℓπ, initial_θ))
+        @printf("Current MSE against dataset Log-likelihood : %g\n",
+            L2LossData(ℓπ, initial_θ))
+    end
 
     # parallel sampling option
     if nchains != 1
@@ -414,17 +348,10 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
 
             fullsolution = BPINNstats(mcmc_chain, samples, stats)
             ensemblecurves, estimnnparams, estimated_params, timepoints = inference(
-                samples,
-                pinnrep,
-                saveat,
-                numensemble,
-                ℓπ)
-
-            bpinnsols[i] = BPINNsolution(fullsolution,
-                ensemblecurves,
-                estimnnparams,
-                estimated_params,
-                timepoints)
+                samples, pinnrep, saveat, numensemble, ℓπ)
+
+            bpinnsols[i] = BPINNsolution(
+                fullsolution, ensemblecurves, estimnnparams, estimated_params, timepoints)
         end
         return bpinnsols
     else
@@ -441,25 +368,20 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         matrix_samples = hcat(samples...)
         mcmc_chain = MCMCChains.Chains(matrix_samples')
 
-        @info("Sampling Complete.")
-        @info("Current Physics Log-likelihood : ",
-            ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]),
-                ℓπ.allstd))
-        @info("Current Prior Log-likelihood : ", priorlogpdf(ℓπ, samples[end]))
-        @info("Current MSE against dataset Log-likelihood : ",
-            L2LossData(ℓπ, samples[end]))
+        if verbose
+            @printf("Sampling Complete.\n")
+            @printf("Current Physics Log-likelihood : %g\n",
+                ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd))
+            @printf("Current Prior Log-likelihood : %g\n", priorlogpdf(ℓπ, samples[end]))
+            @printf("Current MSE against dataset Log-likelihood : %g\n",
+                L2LossData(ℓπ, samples[end]))
+        end
 
         fullsolution = BPINNstats(mcmc_chain, samples, stats)
         ensemblecurves, estimnnparams, estimated_params, timepoints = inference(samples,
-            pinnrep,
-            saveats,
-            numensemble,
-            ℓπ)
-
-        return BPINNsolution(fullsolution,
-            ensemblecurves,
-            estimnnparams,
-            estimated_params,
-            timepoints)
+            pinnrep, saveats, numensemble, ℓπ)
+
+        return BPINNsolution(
+            fullsolution, ensemblecurves, estimnnparams, estimated_params, timepoints)
     end
 end
diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index ca949ec451..f55dded889 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -1,14 +1,8 @@
 abstract type AbstractAdaptiveLoss end
 
 # Utils
-function vectorify(x, t::Type{T}) where {T <: Real}
-    convertfunc(y) = convert(t, y)
-    returnval = if x isa Vector
-        convertfunc.(x)
-    else
-        t[convertfunc(x)]
-    end
-end
+vectorify(x::Vector, ::Type{T}) where {T <: Real} = T.(x)
+vectorify(x, ::Type{T}) where {T <: Real} = T[convert(T, x)]
 
 # Dispatches
 """
@@ -19,47 +13,35 @@ end
 A way of loss weighting the components of the loss function in the total sum that does not
 change during optimization
 """
-mutable struct NonAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
+@concrete mutable struct NonAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
     pde_loss_weights::Vector{T}
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
-    SciMLBase.@add_kwonly function NonAdaptiveLoss{T}(; pde_loss_weights = 1.0,
-            bc_loss_weights = 1.0,
-            additional_loss_weights = 1.0) where {
-            T <:
-            Real
-    }
-        new(vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
-            vectorify(additional_loss_weights, T))
-    end
 end
 
-# default to Float64
-SciMLBase.@add_kwonly function NonAdaptiveLoss(;
-        pde_loss_weights = 1.0, bc_loss_weights = 1.0,
-        additional_loss_weights = 1.0)
-    NonAdaptiveLoss{Float64}(; pde_loss_weights = pde_loss_weights,
-        bc_loss_weights = bc_loss_weights,
-        additional_loss_weights = additional_loss_weights)
+function NonAdaptiveLoss{T}(; pde_loss_weights = 1.0, bc_loss_weights = 1.0,
+        additional_loss_weights = 1.0) where {T <: Real}
+    return NonAdaptiveLoss{T}(
+        vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
+        vectorify(additional_loss_weights, T))
 end
 
-function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
-        adaloss::NonAdaptiveLoss,
-        pde_loss_functions, bc_loss_functions)
-    function null_nonadaptive_loss(θ, pde_losses, bc_losses)
-        nothing
-    end
+NonAdaptiveLoss(; kwargs...) = NonAdaptiveLoss{Float64}(; kwargs...)
+
+function generate_adaptive_loss_function(::PINNRepresentation, ::NonAdaptiveLoss, _, __)
+    return Returns(nothing)
 end
 
 """
     GradientScaleAdaptiveLoss(reweight_every;
-                            weight_change_inertia = 0.9,
-                            pde_loss_weights = 1.0,
-                            bc_loss_weights = 1.0,
-                            additional_loss_weights = 1.0)
+                              weight_change_inertia = 0.9,
+                              pde_loss_weights = 1.0,
+                              bc_loss_weights = 1.0,
+                              additional_loss_weights = 1.0)
 
 A way of adaptively reweighting the components of the loss function in the total sum such
-that BC_i loss weights are scaled by the exponential moving average of max(|∇pde_loss|) / mean(|∇bc_i_loss|)).
+that BC_i loss weights are scaled by the exponential moving average of
+max(|∇pde_loss|) / mean(|∇bc_i_loss|)).
 
 ## Positional Arguments
 
@@ -81,56 +63,43 @@ https://arxiv.org/abs/2001.04536v1
 With code reference:
 https://github.com/PredictiveIntelligenceLab/GradientPathologiesPINNs
 """
-mutable struct GradientScaleAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
-    reweight_every::Int64
+@concrete mutable struct GradientScaleAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
+    reweight_every::Int
     weight_change_inertia::T
     pde_loss_weights::Vector{T}
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
-    SciMLBase.@add_kwonly function GradientScaleAdaptiveLoss{T}(reweight_every;
-            weight_change_inertia = 0.9,
-            pde_loss_weights = 1.0,
-            bc_loss_weights = 1.0,
-            additional_loss_weights = 1.0) where {
-            T <:
-            Real
-    }
-        new(convert(Int64, reweight_every), convert(T, weight_change_inertia),
-            vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
-            vectorify(additional_loss_weights, T))
-    end
 end
-# default to Float64
-SciMLBase.@add_kwonly function GradientScaleAdaptiveLoss(reweight_every;
-        weight_change_inertia = 0.9,
-        pde_loss_weights = 1.0,
-        bc_loss_weights = 1.0,
-        additional_loss_weights = 1.0)
-    GradientScaleAdaptiveLoss{Float64}(reweight_every;
-        weight_change_inertia = weight_change_inertia,
-        pde_loss_weights = pde_loss_weights,
-        bc_loss_weights = bc_loss_weights,
-        additional_loss_weights = additional_loss_weights)
+
+function GradientScaleAdaptiveLoss{T}(reweight_every::Int;
+        weight_change_inertia = 0.9, pde_loss_weights = 1.0,
+        bc_loss_weights = 1.0, additional_loss_weights = 1.0) where {T <: Real}
+    return GradientScaleAdaptiveLoss{T}(reweight_every, weight_change_inertia,
+        vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
+        vectorify(additional_loss_weights, T))
+end
+
+function GradientScaleAdaptiveLoss(args...; kwargs...)
+    return GradientScaleAdaptiveLoss{Float64}(args...; kwargs...)
 end
 
 function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
-        adaloss::GradientScaleAdaptiveLoss,
-        pde_loss_functions, bc_loss_functions)
+        adaloss::GradientScaleAdaptiveLoss, pde_loss_functions, bc_loss_functions)
     weight_change_inertia = adaloss.weight_change_inertia
     iteration = pinnrep.iteration
     adaloss_T = eltype(adaloss.pde_loss_weights)
 
-    function run_loss_gradients_adaptive_loss(θ, pde_losses, bc_losses)
-        if iteration[1] % adaloss.reweight_every == 0
-            # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function
-            pde_grads_maxes = [maximum(abs.(Zygote.gradient(pde_loss_function, θ)[1]))
+    return (θ, pde_losses, bc_losses) -> begin
+        if iteration[] % adaloss.reweight_every == 0
+            # the paper assumes a single pde loss function, so here we grab the maximum of
+            # the maximums of each pde loss function
+            pde_grads_maxes = [maximum(abs, only(Zygote.gradient(pde_loss_function, θ)))
                                for pde_loss_function in pde_loss_functions]
             pde_grads_max = maximum(pde_grads_maxes)
-            bc_grads_mean = [mean(abs.(Zygote.gradient(bc_loss_function, θ)[1]))
+            bc_grads_mean = [mean(abs, only(Zygote.gradient(bc_loss_function, θ)))
                              for bc_loss_function in bc_loss_functions]
 
-            nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) :
-                                  convert(adaloss_T, 1e-7)
+            nonzero_divisor_eps = adaloss_T isa Float64 ? 1e-11 : convert(adaloss_T, 1e-7)
             bc_loss_weights_proposed = pde_grads_max ./
                                        (bc_grads_mean .+ nonzero_divisor_eps)
             adaloss.bc_loss_weights .= weight_change_inertia .*
@@ -138,26 +107,24 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
                                        (1 .- weight_change_inertia) .*
                                        bc_loss_weights_proposed
             logscalar(pinnrep.logger, pde_grads_max, "adaptive_loss/pde_grad_max",
-                iteration[1])
+                iteration[])
             logvector(pinnrep.logger, pde_grads_maxes, "adaptive_loss/pde_grad_maxes",
-                iteration[1])
+                iteration[])
             logvector(pinnrep.logger, bc_grads_mean, "adaptive_loss/bc_grad_mean",
-                iteration[1])
+                iteration[])
             logvector(pinnrep.logger, adaloss.bc_loss_weights,
-                "adaptive_loss/bc_loss_weights",
-                iteration[1])
+                "adaptive_loss/bc_loss_weights", iteration[])
         end
-        nothing
+        return nothing
     end
 end
 
 """
-    function MiniMaxAdaptiveLoss(reweight_every;
-                                pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
-                                bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
-                                pde_loss_weights = 1,
-                                bc_loss_weights = 1,
-                                additional_loss_weights = 1)
+    MiniMaxAdaptiveLoss(reweight_every;
+                        pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
+                        bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
+                        pde_loss_weights = 1, bc_loss_weights = 1,
+                        additional_loss_weights = 1)
 
 A way of adaptively reweighting the components of the loss function in the total sum such
 that the loss weights are maximized by an internal optimizer, which leads to a behavior
@@ -182,74 +149,43 @@ Self-Adaptive Physics-Informed Neural Networks using a Soft Attention Mechanism
 Levi McClenny, Ulisses Braga-Neto
 https://arxiv.org/abs/2009.04544
 """
-mutable struct MiniMaxAdaptiveLoss{T <: Real,
-    PDE_OPT,
-    BC_OPT} <:
-               AbstractAdaptiveLoss
-    reweight_every::Int64
-    pde_max_optimiser::PDE_OPT
-    bc_max_optimiser::BC_OPT
+@concrete mutable struct MiniMaxAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
+    reweight_every::Int
+    pde_max_optimiser <: Optimisers.AbstractRule
+    bc_max_optimiser <: Optimisers.AbstractRule
     pde_loss_weights::Vector{T}
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
-    SciMLBase.@add_kwonly function MiniMaxAdaptiveLoss{T,
-            PDE_OPT, BC_OPT}(reweight_every;
-            pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
-            bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
-            pde_loss_weights = 1.0,
-            bc_loss_weights = 1.0,
-            additional_loss_weights = 1.0) where {
-            T <:
-            Real,
-            PDE_OPT,
-            BC_OPT
-    }
-        new(convert(Int64, reweight_every), convert(PDE_OPT, pde_max_optimiser),
-            convert(BC_OPT, bc_max_optimiser),
-            vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
-            vectorify(additional_loss_weights, T))
-    end
 end
 
-# default to Float64, ADAM, ADAM
-SciMLBase.@add_kwonly function MiniMaxAdaptiveLoss(reweight_every;
-        pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
-        bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
-        pde_loss_weights = 1.0,
-        bc_loss_weights = 1.0,
-        additional_loss_weights = 1.0)
-    MiniMaxAdaptiveLoss{Float64, typeof(pde_max_optimiser),
-        typeof(bc_max_optimiser)}(reweight_every;
-        pde_max_optimiser = pde_max_optimiser,
-        bc_max_optimiser = bc_max_optimiser,
-        pde_loss_weights = pde_loss_weights,
-        bc_loss_weights = bc_loss_weights,
-        additional_loss_weights = additional_loss_weights)
+function MiniMaxAdaptiveLoss{T}(reweight_every::Int; pde_max_optimiser = Adam(1e-4),
+        bc_max_optimiser = Adam(0.5), pde_loss_weights = 1.0, bc_loss_weights = 1.0,
+        additional_loss_weights = 1.0) where {T <: Real}
+    return MiniMaxAdaptiveLoss{T}(reweight_every, pde_max_optimiser, bc_max_optimiser,
+        vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
+        vectorify(additional_loss_weights, T))
 end
 
+MiniMaxAdaptiveLoss(args...; kwargs...) = MiniMaxAdaptiveLoss{Float64}(args...; kwargs...)
+
 function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
-        adaloss::MiniMaxAdaptiveLoss,
-        pde_loss_functions, bc_loss_functions)
-    pde_max_optimiser = adaloss.pde_max_optimiser
-    pde_max_optimiser_setup = OptimizationOptimisers.Optimisers.setup(
-        pde_max_optimiser, adaloss.pde_loss_weights)
-    bc_max_optimiser = adaloss.bc_max_optimiser
-    bc_max_optimiser_setup = OptimizationOptimisers.Optimisers.setup(
-        bc_max_optimiser, adaloss.bc_loss_weights)
+        adaloss::MiniMaxAdaptiveLoss, _, __)
+    pde_max_optimiser_setup = Optimisers.setup(
+        adaloss.pde_max_optimiser, adaloss.pde_loss_weights)
+    bc_max_optimiser_setup = Optimisers.setup(
+        adaloss.bc_max_optimiser, adaloss.bc_loss_weights)
     iteration = pinnrep.iteration
 
-    function run_minimax_adaptive_loss(θ, pde_losses, bc_losses)
-        if iteration[1] % adaloss.reweight_every == 0
-            OptimizationOptimisers.Optimisers.update!(
+    return (θ, pde_losses, bc_losses) -> begin
+        if iteration[] % adaloss.reweight_every == 0
+            Optimisers.update!(
                 pde_max_optimiser_setup, adaloss.pde_loss_weights, -pde_losses)
-            OptimizationOptimisers.Optimisers.update!(
-                bc_max_optimiser_setup, adaloss.bc_loss_weights, -bc_losses)
+            Optimisers.update!(bc_max_optimiser_setup, adaloss.bc_loss_weights, -bc_losses)
             logvector(pinnrep.logger, adaloss.pde_loss_weights,
-                "adaptive_loss/pde_loss_weights", iteration[1])
+                "adaptive_loss/pde_loss_weights", iteration[])
             logvector(pinnrep.logger, adaloss.bc_loss_weights,
-                "adaptive_loss/bc_loss_weights",
-                iteration[1])
+                "adaptive_loss/bc_loss_weights", iteration[])
         end
-        nothing
+        return nothing
     end
 end
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 7105346aa0..380d284f55 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -1,69 +1,41 @@
-mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
-    P <: Vector{<:Distribution},
-    D <:
-    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
-}
+@concrete struct LogTargetDensity
     dim::Int
-    prob::SciMLBase.ODEProblem
-    chain::C
-    st::S
-    strategy::ST
-    dataset::D
-    priors::P
+    prob <: SciMLBase.ODEProblem
+    smodel <: StatefulLuxLayer
+    strategy <: AbstractTrainingStrategy
+    dataset <: Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
+    priors <: Vector{<:Distribution}
     phystd::Vector{Float64}
     l2std::Vector{Float64}
     autodiff::Bool
     physdt::Float64
     extraparams::Int
-    init_params::I
+    init_params <: Union{NamedTuple, ComponentArray}
     estim_collocate::Bool
+end
 
-    function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
-            dataset,
-            priors, phystd, l2std, autodiff, physdt, extraparams,
-            init_params::AbstractVector, estim_collocate)
-        new{
-            typeof(chain),
-            Nothing,
-            typeof(strategy),
-            typeof(init_params),
-            typeof(priors),
-            typeof(dataset)
-        }(dim,
-            prob,
-            chain,
-            nothing, strategy,
-            dataset,
-            priors,
-            phystd,
-            l2std,
-            autodiff,
-            physdt,
-            extraparams,
-            init_params,
-            estim_collocate)
-    end
-    function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
-            dataset,
-            priors, phystd, l2std, autodiff, physdt, extraparams,
-            init_params::NamedTuple, estim_collocate)
-        new{
-            typeof(chain),
-            typeof(st),
-            typeof(strategy),
-            typeof(init_params),
-            typeof(priors),
-            typeof(dataset)
-        }(dim,
-            prob,
-            chain, st, strategy,
-            dataset, priors,
-            phystd, l2std,
-            autodiff,
-            physdt,
-            extraparams,
-            init_params,
-            estim_collocate)
+"""
+NN OUTPUT AT t,θ ~ phi(t,θ).
+"""
+function (f::LogTargetDensity)(t::AbstractVector, θ)
+    θ = vector_to_parameters(θ, f.init_params)
+    dev = safe_get_device(θ)
+    t = safe_expand(dev, t)
+    u0 = f.prob.u0 |> dev
+    return u0 .+ (t' .- f.prob.tspan[1]) .* f.smodel(t', θ)
+end
+
+(f::LogTargetDensity)(t::Number, θ) = f([t], θ)[:, 1]
+
+"""
+Similar to ode_dfdx() in NNODE.
+"""
+function ode_dfdx(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
+    if autodiff
+        return ForwardDiff.jacobian(Base.Fix2(phi, θ), t)
+    else
+        ϵ = sqrt(eps(eltype(t)))
+        return (phi(t .+ ϵ, θ) .- phi(t, θ)) ./ ϵ
     end
 end
 
@@ -71,344 +43,239 @@ end
 Function needed for converting vector of sampled parameters into ComponentVector in case of Lux chain output, derivatives
 the sampled parameters are of exotic type `Dual` due to ForwardDiff's autodiff tagging.
 """
-function vector_to_parameters(ps_new::AbstractVector,
-        ps::Union{NamedTuple, ComponentArrays.ComponentVector})
-    @assert length(ps_new) == Lux.parameterlength(ps)
+function vector_to_parameters(ps_new::AbstractVector, ps::Union{NamedTuple, ComponentArray})
+    @assert length(ps_new) == LuxCore.parameterlength(ps)
     i = 1
     function get_ps(x)
         z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
         i += length(x)
         return z
     end
-    return Functors.fmap(get_ps, ps)
+    return fmap(get_ps, ps)
 end
 
-vector_to_parameters(ps_new::AbstractVector, ps::AbstractVector) = ps_new
+vector_to_parameters(ps_new::AbstractVector, _::AbstractVector) = ps_new
 
-function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
-    if Tar.estim_collocate
-        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ) +
-               L2loss2(Tar, θ)
-    else
-        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
-    end
+function LogDensityProblems.logdensity(ltd::LogTargetDensity, θ)
+    ldensity = physloglikelihood(ltd, θ) + priorweights(ltd, θ) + L2LossData(ltd, θ)
+    ltd.estim_collocate && return ldensity + L2loss2(ltd, θ)
+    return ldensity
 end
 
-LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
+LogDensityProblems.dimension(ltd::LogTargetDensity) = ltd.dim
 
 function LogDensityProblems.capabilities(::LogTargetDensity)
-    LogDensityProblems.LogDensityOrder{1}()
+    return LogDensityProblems.LogDensityOrder{1}()
 end
 
 """
 suggested extra loss function for ODE solver case
 """
-function L2loss2(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
+@views function L2loss2(ltd::LogTargetDensity, θ)
+    ltd.extraparams ≤ 0 && return false  # XXX: type-stability?
 
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        autodiff = Tar.autodiff
-        # Timepoints to enforce Physics 
-        t = Tar.dataset[end]
-        u1 = Tar.dataset[2]
-        û = Tar.dataset[1]
-
-        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        if length(Tar.prob.u0) == 1
-            physsol = [f(û[i],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        else
-            physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
-                       for i in 1:length(û)]
-        end
-        #form of NN output matrix output dim x n 
-        deri_physsol = reduce(hcat, physsol)
-   
-        physlogprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # can add phystd[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 4.0) .*
-                        ones(length(nnsol[i, :]))))),
-                nnsol[i, :])
-        end
-        return physlogprob
+    f = ltd.prob.f
+    t = ltd.dataset[end]
+    u1 = ltd.dataset[2]
+    û = ltd.dataset[1]
+
+    nnsol = ode_dfdx(ltd, t, θ[1:(length(θ) - ltd.extraparams)], ltd.autodiff)
+
+    ode_params = ltd.extraparams == 1 ? θ[((length(θ) - ltd.extraparams) + 1)] :
+                 θ[((length(θ) - ltd.extraparams) + 1):length(θ)]
+
+    physsol = if length(ltd.prob.u0) == 1
+        [f(û[i], ode_params, tᵢ) for (i, tᵢ) in enumerate(t)]
     else
-        return 0
+        [f([û[i], u1[i]], ode_params, tᵢ) for (i, tᵢ) in enumerate(t)]
+    end
+    # form of NN output matrix output dim x n
+    deri_physsol = reduce(hcat, physsol)
+    T = promote_type(eltype(deri_physsol), eltype(nnsol))
+
+    physlogprob = T(0)
+    for i in 1:length(ltd.prob.u0)
+        physlogprob += logpdf(
+            MvNormal(deri_physsol[i, :],
+                Diagonal(abs2.(T(ltd.phystd[i]) .* ones(T, length(nnsol[i, :]))))),
+            nnsol[i, :]
+        )
     end
+    return physlogprob
 end
 
 """
 L2 loss loglikelihood(needed for ODE parameter estimation).
 """
-function L2LossData(Tar::LogTargetDensity, θ)
-    # check if dataset is provided
-    if Tar.dataset isa Vector{Nothing} || Tar.extraparams == 0
-        return 0
-    else
-        # matrix(each row corresponds to vector u's rows)
-        nn = Tar(Tar.dataset[end], θ[1:(length(θ) - Tar.extraparams)])
-
-        L2logprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
-            L2logprob += logpdf(
-                MvNormal(nn[i, :],
-                    LinearAlgebra.Diagonal(abs2.(Tar.l2std[i] .*
-                                                 ones(length(Tar.dataset[i]))))),
-                Tar.dataset[i])
-        end
-        return L2logprob
+@views function L2LossData(ltd::LogTargetDensity, θ)
+    (ltd.dataset isa Vector{Nothing} || ltd.extraparams == 0) && return 0
+
+    # matrix(each row corresponds to vector u's rows)
+    nn = ltd(ltd.dataset[end], θ[1:(length(θ) - ltd.extraparams)])
+    T = eltype(nn)
+
+    L2logprob = zero(T)
+    for i in 1:length(ltd.prob.u0)
+        # for u[i] ith vector must be added to dataset,nn[1, :] is the dx in lotka_volterra
+        L2logprob += logpdf(
+            MvNormal(
+                nn[i, :],
+                Diagonal(abs2.(T(ltd.l2std[i]) .* ones(T, length(ltd.dataset[i]))))
+            ),
+            ltd.dataset[i]
+        )
     end
+    return L2logprob
 end
 
 """
 Physics loglikelihood over problem timespan + dataset timepoints.
 """
-function physloglikelihood(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-    p = Tar.prob.p
-    tspan = Tar.prob.tspan
-    autodiff = Tar.autodiff
-    strategy = Tar.strategy
+function physloglikelihood(ltd::LogTargetDensity, θ)
+    (; f, p, tspan) = ltd.prob
+    (; autodiff, strategy) = ltd
 
     # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+    if ltd.extraparams > 0
+        ode_params = ltd.extraparams == 1 ? θ[((length(θ) - ltd.extraparams) + 1)] :
+                     θ[((length(θ) - ltd.extraparams) + 1):length(θ)]
     else
-        ode_params = p == SciMLBase.NullParameters() ? [] : p
+        ode_params = p isa SciMLBase.NullParameters ? Float64[] : p
     end
 
-    return getlogpdf(strategy, Tar, f, autodiff, tspan, ode_params, θ)
+    return getlogpdf(strategy, ltd, f, autodiff, tspan, ode_params, θ)
 end
 
-function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
-        tspan,
-        ode_params, θ)
-    if Tar.dataset isa Vector{Nothing}
-        t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
-    else
-        t = vcat(collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]),
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
+function getlogpdf(strategy::GridTraining, ltd::LogTargetDensity, f, autodiff::Bool,
+        tspan, ode_params, θ)
+    ts = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
+    t = ltd.dataset isa Vector{Nothing} ? ts : vcat(ts, ltd.dataset[end])
+    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
 end
 
-function getlogpdf(strategy::StochasticTraining,
-        Tar::LogTargetDensity,
-        f,
-        autodiff::Bool,
-        tspan,
-        ode_params,
-        θ)
-    if Tar.dataset isa Vector{Nothing}
-        t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
-    else
-        t = vcat([(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)],
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
+function getlogpdf(strategy::StochasticTraining, ltd::LogTargetDensity,
+        f, autodiff::Bool, tspan, ode_params, θ)
+    T = promote_type(eltype(tspan[1]), eltype(tspan[2]))
+    samples = (tspan[2] - tspan[1]) .* rand(T, strategy.points) .+ tspan[1]
+    t = ltd.dataset isa Vector{Nothing} ? samples : vcat(samples, ltd.dataset[end])
+    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
 end
 
-function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
-        autodiff::Bool,
-        tspan,
-        ode_params, θ)
-    function integrand(t::Number, θ)
-        innerdiff(Tar, f, autodiff, [t], θ, ode_params)
-    end
+function getlogpdf(strategy::QuadratureTraining, ltd::LogTargetDensity, f, autodiff::Bool,
+        tspan, ode_params, θ)
+    integrand(t::Number, θ) = innerdiff(ltd, f, autodiff, [t], θ, ode_params)
     intprob = IntegralProblem(
-        integrand, (tspan[1], tspan[2]), θ; nout = length(Tar.prob.u0))
-    sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
-    sum(sol.u)
+        integrand, (tspan[1], tspan[2]), θ; nout = length(ltd.prob.u0))
+    sol = solve(intprob, QuadGKJL(); strategy.abstol, strategy.reltol)
+    return sum(sol.u)
 end
 
-function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
-        autodiff::Bool,
-        tspan,
-        ode_params, θ)
-    minT = tspan[1]
-    maxT = tspan[2]
-
+function getlogpdf(strategy::WeightedIntervalTraining, ltd::LogTargetDensity, f,
+        autodiff::Bool, tspan, ode_params, θ)
+    minT, maxT = tspan
     weights = strategy.weights ./ sum(strategy.weights)
-
     N = length(weights)
-    points = strategy.points
-
     difference = (maxT - minT) / N
 
-    data = Float64[]
+    ts = eltype(difference)[]
     for (index, item) in enumerate(weights)
-        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
+        temp_data = rand(1, trunc(Int, strategy.points * item)) .* difference .+ minT .+
                     ((index - 1) * difference)
-        data = append!(data, temp_data)
+        append!(ts, temp_data)
     end
 
-    if Tar.dataset isa Vector{Nothing}
-        t = data
-    else
-        t = vcat(data,
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
+    t = ltd.dataset isa Vector{Nothing} ? ts : vcat(ts, ltd.dataset[end])
+    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
 end
 
 """
 MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ.
 """
-function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
+@views function innerdiff(ltd::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
         ode_params)
+    # ltd used for phi and LogTargetDensity object attributes access
+    out = ltd(t, θ[1:(length(θ) - ltd.extraparams)])
 
-    # Tar used for phi and LogTargetDensity object attributes access
-    out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
-
-    # # reject samples case(write clear reason why)
-    if any(isinf, out[:, 1]) || any(isinf, ode_params)
-        return -Inf
-    end
+    # reject samples case(write clear reason why)
+    (any(isinf, out[:, 1]) || any(isinf, ode_params)) && return convert(eltype(out), -Inf)
 
     # this is a vector{vector{dx,dy}}(handle case single u(float passed))
     if length(out[:, 1]) == 1
-        physsol = [f(out[:, i][1],
-                       ode_params,
-                       t[i])
-                   for i in 1:length(out[1, :])]
+        physsol = [f(out[:, i][1], ode_params, t[i]) for i in 1:length(out[1, :])]
     else
-        physsol = [f(out[:, i],
-                       ode_params,
-                       t[i])
-                   for i in 1:length(out[1, :])]
+        physsol = [f(out[:, i], ode_params, t[i]) for i in 1:length(out[1, :])]
     end
     physsol = reduce(hcat, physsol)
 
-    nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+    nnsol = ode_dfdx(ltd, t, θ[1:(length(θ) - ltd.extraparams)], autodiff)
 
     vals = nnsol .- physsol
+    T = eltype(vals)
 
-    # N dimensional vector if N outputs for NN(each row has logpdf of u[i] where u is vector of dependant variables)
+    # N dimensional vector if N outputs for NN(each row has logpdf of u[i] where u is vector
+    # of dependant variables)
     return [logpdf(
                 MvNormal(vals[i, :],
-                    LinearAlgebra.Diagonal(abs2.(Tar.phystd[i] .*
-                                                 ones(length(vals[i, :]))))),
-                zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
+                    Diagonal(abs2.(T(ltd.phystd[i]) .* ones(T, length(vals[i, :]))))),
+                zeros(T, length(vals[i, :]))
+            ) for i in 1:length(ltd.prob.u0)]
 end
 
 """
 Prior logpdf for NN parameters + ODE constants.
 """
-function priorweights(Tar::LogTargetDensity, θ)
-    allparams = Tar.priors
-    # nn weights
-    nnwparams = allparams[1]
-
-    if Tar.extraparams > 0
-        # Vector of ode parameters priors
-        invpriors = allparams[2:end]
-
-        invlogpdf = sum(
-            logpdf(invpriors[length(θ) - i + 1], θ[i])
-            for i in (length(θ) - Tar.extraparams + 1):length(θ);
-            init = 0.0)
-
-        return (invlogpdf
-                +
-                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
-    else
-        return logpdf(nnwparams, θ)
-    end
-end
+@views function priorweights(ltd::LogTargetDensity, θ)
+    allparams = ltd.priors
+    nnwparams = allparams[1] # nn weights
 
-function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    return init_params, chain, st
-end
+    ltd.extraparams ≤ 0 && return logpdf(nnwparams, θ)
 
-function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    return θ, chain, st
-end
+    # Vector of ode parameters priors
+    invpriors = allparams[2:end]
 
-"""
-NN OUTPUT AT t,θ ~ phi(t,θ).
-"""
-function (f::LogTargetDensity{C, S})(t::AbstractVector,
-        θ) where {C <: Lux.AbstractExplicitLayer, S}
-    θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
+    invlogpdf = sum(
+        logpdf(invpriors[length(θ) - i + 1], θ[i])
+    for i in (length(θ) - ltd.extraparams + 1):length(θ))
+
+    return invlogpdf + logpdf(nnwparams, θ[1:(length(θ) - ltd.extraparams)])
 end
 
-function (f::LogTargetDensity{C, S})(t::Number,
-        θ) where {C <: Lux.AbstractExplicitLayer, S}
-    θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y
+function generate_ltd(chain::AbstractLuxLayer, init_params)
+    return init_params, chain, LuxCore.initialstates(Random.default_rng(), chain)
 end
 
-"""
-Similar to ode_dfdx() in NNODE.
-"""
-function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
-    if autodiff
-        hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...)
-    else
-        (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
-    end
+function generate_ltd(chain::AbstractLuxLayer, ::Nothing)
+    θ, st = LuxCore.setup(Random.default_rng(), chain)
+    return θ, chain, st
 end
 
 function kernelchoice(Kernel, MCMCkwargs)
     if Kernel == HMCDA
-        δ, λ = MCMCkwargs[:δ], MCMCkwargs[:λ]
-        Kernel(δ, λ)
+        Kernel(MCMCkwargs[:δ], MCMCkwargs[:λ])
     elseif Kernel == NUTS
         δ, max_depth, Δ_max = MCMCkwargs[:δ], MCMCkwargs[:max_depth], MCMCkwargs[:Δ_max]
-        Kernel(δ, max_depth = max_depth, Δ_max = Δ_max)
-    else
-        # HMC
-        n_leapfrog = MCMCkwargs[:n_leapfrog]
-        Kernel(n_leapfrog)
+        Kernel(δ; max_depth, Δ_max)
+    else # HMC
+        Kernel(MCMCkwargs[:n_leapfrog])
     end
 end
 
 """
-    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
-                        dataset = [nothing],init_params = nothing,
-                        draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
-                        phystd = [0.05], priorsNNw = (0.0, 2.0),
-                        param = [], nchains = 1, autodiff = false, Kernel = HMC,
-                        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-                                         Metric = DiagEuclideanMetric,
-                                         targetacceptancerate = 0.8),
-                        Integratorkwargs = (Integrator = Leapfrog,),
-                        MCMCkwargs = (n_leapfrog = 30,),
-                        progress = false, verbose = false)
+    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining, dataset = [nothing],
+                           init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0f0,
+                           l2std = [0.05], phystd = [0.05], priorsNNw = (0.0, 2.0),
+                           param = [], nchains = 1, autodiff = false, Kernel = HMC,
+                           Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+                               Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+                           Integratorkwargs = (Integrator = Leapfrog,),
+                           MCMCkwargs = (n_leapfrog = 30,), progress = false,
+                           verbose = false)
 
 !!! warn
 
-    Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the out-of-place form, i.e.
-    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the `ahmc_bayesian_pinn_ode()`
-    will exit with an error.
+    Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the
+    out-of-place form, i.e. `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared
+    out-of-place, then `ahmc_bayesian_pinn_ode()` will exit with an error.
 
 ## Example
 
@@ -460,21 +327,28 @@ Incase you are only solving the Equations for solution, do not provide dataset
 
 ## Keyword Arguments
 
-* `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
-* `init_params`: initial parameter values for BPINN (ideally for multiple chains different initializations preferred)
+* `strategy`: The training strategy used to choose the points for the evaluations. By
+  default GridTraining is used with given physdt discretization.
+* `init_params`: initial parameter values for BPINN (ideally for multiple chains different
+  initializations preferred)
 * `nchains`: number of chains you want to sample
-* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
+* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are
+  ~2/3 of draw samples)
 * `l2std`: standard deviation of BPINN prediction against L2 losses/Dataset
 * `phystd`: standard deviation of BPINN prediction against Chosen Underlying ODE System
-* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of BPINN are Normal Distributions by default.
+* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of
+  BPINN are Normal Distributions by default.
 * `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
 * `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
 * `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
 * `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implementations HMC/NUTS/HMCDA)
-* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-    Note: Target percentage(in decimal) of iterations in which the proposals are accepted (0.8 by default)
-* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's(HMC/NUTS/HMCDA) Arguments, as follows :
+* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`.
+  Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`.
+  Refer: https://turinglang.org/AdvancedHMC.jl/stable/ Note: Target percentage (in decimal)
+  of iterations in which the proposals are accepted (0.8 by default)
+* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's (HMC/NUTS/HMCDA)
+  Arguments, as follows :
     * `n_leapfrog`: number of leapfrog steps for HMC
     * `δ`: target acceptance probability for NUTS and HMCDA
     * `λ`: target trajectory length for HMCDA
@@ -484,67 +358,53 @@ Incase you are only solving the Equations for solution, do not provide dataset
 * `progress`: controls whether to show the progress meter or not.
 * `verbose`: controls the verbosity. (Sample call args in AHMC)
 
-## Warnings
+!!! warning
 
-* AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
+    AdvancedHMC.jl is still developing convenience structs so might need changes on new
+    releases.
 """
-function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
-        strategy = GridTraining, dataset = [nothing],
-        init_params = nothing, draw_samples = 1000,
-        physdt = 1 / 20.0, l2std = [0.05],
-        phystd = [0.05], priorsNNw = (0.0, 2.0),
-        param = [], nchains = 1, autodiff = false,
+function ahmc_bayesian_pinn_ode(
+        prob::SciMLBase.ODEProblem, chain; strategy = GridTraining, dataset = [nothing],
+        init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0, l2std = [0.05],
+        phystd = [0.05], priorsNNw = (0.0, 2.0), param = [], nchains = 1, autodiff = false,
         Kernel = HMC,
-        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-        Integratorkwargs = (Integrator = Leapfrog,),
-        MCMCkwargs = (n_leapfrog = 30,),
-        progress = false, verbose = false,
-        estim_collocate = false)
-    !(chain isa Lux.AbstractExplicitLayer) &&
-        (chain = adapt(FromFluxAdaptor(false, false), chain))
-    # NN parameter prior mean and variance(PriorsNN must be a tuple)
-    if isinplace(prob)
-        throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
-    end
+        Adaptorkwargs = (Adaptor = StanHMCAdaptor, Metric = DiagEuclideanMetric,
+            targetacceptancerate = 0.8),
+        Integratorkwargs = (Integrator = Leapfrog,), MCMCkwargs = (n_leapfrog = 30,),
+        progress = false, verbose = false, estim_collocate = false)
+    @assert !isinplace(prob) "The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."
+
+    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
 
     strategy = strategy == GridTraining ? strategy(physdt) : strategy
 
     if dataset != [nothing] &&
        (length(dataset) < 2 || !(dataset isa Vector{<:Vector{<:AbstractFloat}}))
-        throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
+        error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}")
     end
 
     if dataset != [nothing] && param == []
         println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
     elseif dataset == [nothing] && param != []
-        throw(error("Dataset Required for Parameter Estimation."))
+        error("Dataset Required for Parameter Estimation.")
     end
 
-    if chain isa Lux.AbstractExplicitLayer
-        # Lux-Named Tuple
-        initial_nnθ, recon, st = generate_Tar(chain, init_params)
-    else
-        error("Only Lux.AbstractExplicitLayer Neural networks are supported")
-    end
+    initial_nnθ, chain, st = generate_ltd(chain, init_params)
 
-    if nchains > Threads.nthreads()
-        throw(error("number of chains is greater than available threads"))
-    elseif nchains < 1
-        throw(error("number of chains must be greater than 1"))
-    end
+    @assert nchains≤Threads.nthreads() "number of chains is greater than available threads"
+    @assert nchains≥1 "number of chains must be greater than 1"
 
     # eltype(physdt) cause needs Float64 for find_good_stepsize
     # Lux chain(using component array later as vector_to_parameter need namedtuple)
-    initial_θ = collect(eltype(physdt),
-        vcat(ComponentArrays.ComponentArray(initial_nnθ)))
+    T = eltype(physdt)
+    initial_θ = getdata(ComponentArray{T}(initial_nnθ))
 
     # adding ode parameter estimation
     nparameters = length(initial_θ)
     ninv = length(param)
     priors = [
-        MvNormal(priorsNNw[1] * ones(nparameters),
-        LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
+        MvNormal(T(priorsNNw[1]) * ones(T, nparameters),
+        Diagonal(abs2.(T(priorsNNw[2]) .* ones(T, nparameters))))
     ]
 
     # append Ode params to all paramvector
@@ -556,29 +416,25 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
     end
 
     t0 = prob.tspan[1]
+    smodel = StatefulLuxLayer{true}(chain, nothing, st)
     # dimensions would be total no of params,initial_nnθ for Lux namedTuples
-    ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
+    ℓπ = LogTargetDensity(nparameters, prob, smodel, strategy, dataset, priors,
         phystd, l2std, autodiff, physdt, ninv, initial_nnθ, estim_collocate)
 
-    try
-        ℓπ(t0, initial_θ[1:(nparameters - ninv)])
-    catch err
-        if isa(err, DimensionMismatch)
-            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
-        else
-            throw(err)
+    if verbose
+        @printf("Current Physics Log-likelihood: %g\n", physloglikelihood(ℓπ, initial_θ))
+        @printf("Current Prior Log-likelihood: %g\n", priorweights(ℓπ, initial_θ))
+        @printf("Current MSE against dataset Log-likelihood: %g\n",
+            L2LossData(ℓπ, initial_θ))
+        if estim_collocate
+            @printf("Current gradient loss against dataset Log-likelihood: %g\n",
+                L2loss2(ℓπ, initial_θ))
         end
     end
 
-    @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, initial_θ))
-    @info("Current Prior Log-likelihood : ", priorweights(ℓπ, initial_θ))
-    @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
-    if estim_collocate
-        @info("Current gradient loss against dataset Log-likelihood : ", L2loss2(ℓπ, initial_θ))
-    end
-
-    Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
-    Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
+    Adaptor = Adaptorkwargs[:Adaptor]
+    Metric = Adaptorkwargs[:Metric]
+    targetacceptancerate = Adaptorkwargs[:targetacceptancerate]
 
     # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
     metric = Metric(nparameters)
@@ -593,8 +449,10 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
 
         Threads.@threads for i in 1:nchains
             # each chain has different initial NNparameter values(better posterior exploration)
-            initial_θ = vcat(randn(nparameters - ninv),
-                initial_θ[(nparameters - ninv + 1):end])
+            initial_θ = vcat(
+                randn(eltype(initial_θ), nparameters - ninv),
+                initial_θ[(nparameters - ninv + 1):end]
+            )
             initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
             integrator = integratorchoice(Integratorkwargs, initial_ϵ)
             adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
@@ -607,7 +465,7 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
 
             samplesc[i] = samples
             statsc[i] = stats
-            mcmc_chain = Chains(hcat(samples...)')
+            mcmc_chain = Chains(reduce(hcat, samples)')
             chains[i] = mcmc_chain
         end
 
@@ -623,12 +481,17 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
         samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
             adaptor; progress = progress, verbose = verbose)
 
-        @info("Sampling Complete.")
-        @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, samples[end]))
-        @info("Current Prior Log-likelihood : ", priorweights(ℓπ, samples[end]))
-        @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, samples[end]))
-        if estim_collocate
-            @info("Current gradient loss against dataset Log-likelihood : ", L2loss2(ℓπ, samples[end]))
+        if verbose
+            println("Sampling Complete.")
+            @printf("Current Physics Log-likelihood: %g\n",
+                physloglikelihood(ℓπ, samples[end]))
+            @printf("Current Prior Log-likelihood: %g\n", priorweights(ℓπ, samples[end]))
+            @printf("Current MSE against dataset Log-likelihood: %g\n",
+                L2LossData(ℓπ, samples[end]))
+            if estim_collocate
+                @printf("Current gradient loss against dataset Log-likelihood: %g\n",
+                    L2loss2(ℓπ, samples[end]))
+            end
         end
 
         # return a chain(basic chain),samples and stats
diff --git a/src/dae_solve.jl b/src/dae_solve.jl
index 5a5ee83be3..8cdd4a087f 100644
--- a/src/dae_solve.jl
+++ b/src/dae_solve.jl
@@ -1,85 +1,76 @@
 """
-    NNDAE(chain,
-        OptimizationOptimisers.Adam(0.1),
-        init_params = nothing;
-        autodiff = false,
-        kwargs...)
+    NNDAE(chain, opt, init_params = nothing; autodiff = false, kwargs...)
 
-Algorithm for solving differential algebraic equationsusing a neural network. This is a specialization
-of the physics-informed neural network which is used as a solver for a standard `DAEProblem`.
+Algorithm for solving differential algebraic equationsusing a neural network. This is a
+specialization of the physics-informed neural network which is used as a solver for a
+standard `DAEProblem`.
 
-!!! warn
+!!! warning
 
     Note that NNDAE only supports DAEs which are written in the out-of-place form, i.e.
-    `du = f(du,u,p,t)`, and not `f(out,du,u,p,t)`. If not declared out-of-place, then the NNDAE
-    will exit with an error.
+    `du = f(du,u,p,t)`, and not `f(out,du,u,p,t)`. If not declared out-of-place, then the
+    NNDAE will exit with an error.
 
 ## Positional Arguments
 
-* `chain`: A neural network architecture, defined as either a `Flux.Chain` or a `Lux.AbstractExplicitLayer`.
+* `chain`: A neural network architecture, defined as either a `Flux.Chain` or a
+  `Lux.AbstractLuxLayer`.
 * `opt`: The optimizer to train the neural network.
 * `init_params`: The initial parameter of the neural network. By default, this is `nothing`
   which thus uses the random initialization provided by the neural network library.
 
 ## Keyword Arguments
 
-* `autodiff`: The switch between automatic(not supported yet) and numerical differentiation for
-              the PDE operators. The reverse mode of the loss function is always
+* `autodiff`: The switch between automatic (not supported yet) and numerical differentiation
+              for the PDE operators. The reverse mode of the loss function is always
               automatic differentiation (via Zygote), this is only for the derivative
               in the loss function (the derivative with respect to time).
 * `strategy`: The training strategy used to choose the points for the evaluations.
               By default, `GridTraining` is used with `dt` if given.
 """
-struct NNDAE{C, O, P, K, S <: Union{Nothing, AbstractTrainingStrategy}
-} <: SciMLBase.AbstractDAEAlgorithm
-    chain::C
-    opt::O
-    init_params::P
+@concrete struct NNDAE <: SciMLBase.AbstractDAEAlgorithm
+    chain <: AbstractLuxLayer
+    opt
+    init_params
     autodiff::Bool
-    strategy::S
-    kwargs::K
+    strategy <: Union{Nothing, AbstractTrainingStrategy}
+    kwargs
 end
 
 function NNDAE(chain, opt, init_params = nothing; strategy = nothing, autodiff = false,
         kwargs...)
-    !(chain isa Lux.AbstractExplicitLayer) &&
-        (chain = adapt(FromFluxAdaptor(false, false), chain))
-    NNDAE(chain, opt, init_params, autodiff, strategy, kwargs)
+    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
+    return NNDAE(chain, opt, init_params, autodiff, strategy, kwargs)
 end
 
 function dfdx(phi::ODEPhi, t::AbstractVector, θ, autodiff::Bool,
         differential_vars::AbstractVector)
-    if autodiff
-        autodiff && throw(ArgumentError("autodiff not supported for DAE problem."))
-    else
-        dphi = (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
-        batch_size = size(t)[1]
-        reduce(vcat,
-            [dv ? dphi[[i], :] : zeros(1, batch_size)
-             for (i, dv) in enumerate(differential_vars)])
-    end
+    autodiff && throw(ArgumentError("autodiff not supported for DAE problem."))
+    ϵ = sqrt(eps(eltype(t)))
+    dϕ = (phi(t .+ ϵ, θ) .- phi(t, θ)) ./ ϵ
+    return reduce(vcat,
+        [dv ? dϕ[i:i, :] : zeros(eltype(dϕ), 1, size(dϕ, 2))
+         for (i, dv) in enumerate(differential_vars)])
 end
 
-function inner_loss(phi::ODEPhi{C, T, U}, f, autodiff::Bool, t::AbstractVector, θ,
-        p, differential_vars::AbstractVector) where {C, T, U}
-    out = Array(phi(t, θ))
-    dphi = Array(dfdx(phi, t, θ, autodiff, differential_vars))
-    arrt = Array(t)
-    loss = reduce(hcat, [f(dphi[:, i], out[:, i], p, arrt[i]) for i in 1:size(out, 2)])
-    sum(abs2, loss) / length(t)
+function inner_loss(phi::ODEPhi, f, autodiff::Bool, t::AbstractVector,
+        θ, p, differential_vars::AbstractVector)
+    out = phi(t, θ)
+    dphi = dfdx(phi, t, θ, autodiff, differential_vars)
+    return mapreduce(+, enumerate(t)) do (i, tᵢ)
+        sum(abs2, f(dphi[:, i], out[:, i], p, tᵢ))
+    end / length(t)
 end
 
-function generate_loss(strategy::GridTraining, phi, f, autodiff::Bool, tspan, p,
+function generate_loss(strategy::GridTraining, phi::ODEPhi, f, autodiff::Bool, tspan, p,
         differential_vars::AbstractVector)
-    ts = tspan[1]:(strategy.dx):tspan[2]
     autodiff && throw(ArgumentError("autodiff not supported for GridTraining."))
-    function loss(θ, _)
-        sum(abs2, inner_loss(phi, f, autodiff, ts, θ, p, differential_vars))
-    end
-    return loss
+    ts = tspan[1]:(strategy.dx):tspan[2]
+    return (θ, _) -> sum(abs2, inner_loss(phi, f, autodiff, ts, θ, p, differential_vars))
 end
 
-function SciMLBase.__solve(prob::SciMLBase.AbstractDAEProblem,
+function SciMLBase.__solve(
+        prob::SciMLBase.AbstractDAEProblem,
         alg::NNDAE,
         args...;
         dt = nothing,
@@ -91,75 +82,43 @@ function SciMLBase.__solve(prob::SciMLBase.AbstractDAEProblem,
         verbose = false,
         saveat = nothing,
         maxiters = nothing,
-        tstops = nothing)
-    u0 = prob.u0
-    du0 = prob.du0
-    tspan = prob.tspan
-    f = prob.f
-    p = prob.p
+        tstops = nothing
+)
+    (; u0, tspan, f, p, differential_vars) = prob
     t0 = tspan[1]
+    (; chain, opt, autodiff, init_params) = alg
 
-    #hidden layer
-    chain = alg.chain
-    opt = alg.opt
-    autodiff = alg.autodiff
-
-    #train points generation
-    init_params = alg.init_params
-
-    # A logical array which declares which variables are the differential (non-algebraic) vars
-    differential_vars = prob.differential_vars
+    phi, init_params = generate_phi_θ(chain, t0, u0, init_params)
+    init_params = ComponentArray(; depvar = init_params)
 
-    if chain isa Lux.AbstractExplicitLayer || chain isa Flux.Chain
-        phi, init_params = generate_phi_θ(chain, t0, u0, init_params)
-        init_params = ComponentArrays.ComponentArray(;
-            depvar = ComponentArrays.ComponentArray(init_params))
-    else
-        error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")
-    end
-
-    if isinplace(prob)
-        throw(error("The NNODE solver only supports out-of-place DAE definitions, i.e. du=f(u,p,t)."))
-    end
-
-    try
-        phi(t0, init_params)
-    catch err
-        if isa(err, DimensionMismatch)
-            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
-        else
-            throw(err)
-        end
-    end
+    @assert !isinplace(prob) "The NNODE solver only supports out-of-place DAE definitions, i.e. du=f(u,p,t)."
 
     strategy = if alg.strategy === nothing
-        if dt !== nothing
-            GridTraining(dt)
-        else
-            error("dt is not defined")
-        end
+        dt === nothing && error("`dt` is not defined")
+        GridTraining(dt)
     end
 
     inner_f = generate_loss(strategy, phi, f, autodiff, tspan, p, differential_vars)
 
-    # Creates OptimizationFunction Object from total_loss
     total_loss(θ, _) = inner_f(θ, phi)
+    optf = OptimizationFunction(total_loss, AutoZygote())
 
-    # Optimization Algo for Training Strategies
-    opt_algo = Optimization.AutoZygote()
-    # Creates OptimizationFunction Object from total_loss
-    optf = OptimizationFunction(total_loss, opt_algo)
-
-    iteration = 0
+    plen = maxiters === nothing ? 6 : ndigits(maxiters)
     callback = function (p, l)
-        iteration += 1
-        verbose && println("Current loss is: $l, Iteration: $iteration")
-        l < abstol
+        if verbose
+            if maxiters === nothing
+                @printf("[NNDAE]\tIter: [%*d]\tLoss: %g\n", plen, p.iter, l)
+            else
+                @printf("[NNDAE]\tIter: [%*d/%d]\tLoss: %g\n", plen, p.iter, maxiters, l)
+            end
+        end
+        return l < abstol
     end
+
     optprob = OptimizationProblem(optf, init_params)
     res = solve(optprob, opt; callback, maxiters, alg.kwargs...)
 
-    #solutions at timepoints
+    # solutions at timepoints
     if saveat isa Number
         ts = tspan[1]:saveat:tspan[2]
     elseif saveat isa AbstractArray
@@ -178,14 +137,11 @@ function SciMLBase.__solve(prob::SciMLBase.AbstractDAEProblem,
         u = [phi(t, res.u) for t in ts]
     end
 
-    sol = SciMLBase.build_solution(prob, alg, ts, u;
-        k = res, dense = true,
-        calculate_error = false,
-        retcode = ReturnCode.Success,
-        original = res,
+    sol = SciMLBase.build_solution(prob, alg, ts, u; k = res, dense = true,
+        calculate_error = false, retcode = ReturnCode.Success, original = res,
         resid = res.objective)
     SciMLBase.has_analytic(prob.f) &&
         SciMLBase.calculate_solution_errors!(sol; timeseries_errors = true,
             dense_errors = false)
-    sol
+    return sol
 end
diff --git a/src/dgm.jl b/src/dgm.jl
index 40fe88134e..15b872ef60 100644
--- a/src/dgm.jl
+++ b/src/dgm.jl
@@ -1,22 +1,19 @@
-struct dgm_lstm_layer{F1, F2} <: Lux.AbstractExplicitLayer
-    activation1::Function
-    activation2::Function
+@concrete struct DGMLSTMLayer <: AbstractLuxLayer
+    activation1
+    activation2
     in_dims::Int
     out_dims::Int
-    init_weight::F1
-    init_bias::F2
+    init_weight
+    init_bias
 end
 
-function dgm_lstm_layer(in_dims::Int, out_dims::Int, activation1, activation2;
-        init_weight = Lux.glorot_uniform, init_bias = Lux.zeros32)
-    return dgm_lstm_layer{typeof(init_weight), typeof(init_bias)}(
-        activation1, activation2, in_dims, out_dims, init_weight, init_bias)
+function DGMLSTMLayer(in_dims::Int, out_dims::Int, activation1, activation2;
+        init_weight = glorot_uniform, init_bias = zeros32)
+    return DGMLSTMLayer(activation1, activation2, in_dims, out_dims, init_weight, init_bias)
 end
 
-import Lux: initialparameters, initialstates, parameterlength, statelength
-
-function Lux.initialparameters(rng::AbstractRNG, l::dgm_lstm_layer)
-    return (
+function initialparameters(rng::AbstractRNG, l::DGMLSTMLayer)
+    return (;
         Uz = l.init_weight(rng, l.out_dims, l.in_dims),
         Ug = l.init_weight(rng, l.out_dims, l.in_dims),
         Ur = l.init_weight(rng, l.out_dims, l.in_dims),
@@ -32,75 +29,43 @@ function Lux.initialparameters(rng::AbstractRNG, l::dgm_lstm_layer)
     )
 end
 
-Lux.initialstates(::AbstractRNG, ::dgm_lstm_layer) = NamedTuple()
-function Lux.parameterlength(l::dgm_lstm_layer)
-    4 * (l.out_dims * l.in_dims + l.out_dims * l.out_dims + l.out_dims)
-end
-Lux.statelength(l::dgm_lstm_layer) = 0
-
-function (layer::dgm_lstm_layer)(
-        S::AbstractVecOrMat{T}, x::AbstractVecOrMat{T}, ps, st::NamedTuple) where {T}
-    @unpack Uz, Ug, Ur, Uh, Wz, Wg, Wr, Wh, bz, bg, br, bh = ps
-    Z = layer.activation1.(Uz * x + Wz * S .+ bz)
-    G = layer.activation1.(Ug * x + Wg * S .+ bg)
-    R = layer.activation1.(Ur * x + Wr * S .+ br)
-    H = layer.activation2.(Uh * x + Wh * (S .* R) .+ bh)
-    S_new = (1.0 .- G) .* H .+ Z .* S
-    return S_new, st
-end
-
-struct dgm_lstm_block{L <: NamedTuple} <: Lux.AbstractExplicitContainerLayer{(:layers,)}
-    layers::L
-end
-
-function dgm_lstm_block(l...)
-    names = ntuple(i -> Symbol("dgm_lstm_$i"), length(l))
-    layers = NamedTuple{names}(l)
-    return dgm_lstm_block(layers)
+function parameterlength(l::DGMLSTMLayer)
+    return 4 * (l.out_dims * l.in_dims + l.out_dims * l.out_dims + l.out_dims)
 end
 
-dgm_lstm_block(xs::AbstractVector) = dgm_lstm_block(xs...)
-
-@generated function apply_dgm_lstm_block(layers::NamedTuple{fields}, S::AbstractVecOrMat,
-        x::AbstractVecOrMat, ps, st::NamedTuple) where {fields}
-    N = length(fields)
-    S_symbols = vcat([:S], [gensym() for _ in 1:N])
-    x_symbol = :x
-    st_symbols = [gensym() for _ in 1:N]
-    calls = [:(($(S_symbols[i + 1]), $(st_symbols[i])) = layers.$(fields[i])(
-                 $(S_symbols[i]), $(x_symbol), ps.$(fields[i]), st.$(fields[i])))
-             for i in 1:N]
-    push!(calls, :(st = NamedTuple{$fields}((($(Tuple(st_symbols)...),)))))
-    push!(calls, :(return $(S_symbols[N + 1]), st))
-    return Expr(:block, calls...)
+# TODO: use more optimized versions from LuxLib
+# XXX: Why not use the one from Lux?
+function (layer::DGMLSTMLayer)((S, x), ps, st::NamedTuple)
+    (; Uz, Ug, Ur, Uh, Wz, Wg, Wr, Wh, bz, bg, br, bh) = ps
+    Z = layer.activation1.(Uz * x .+ Wz * S .+ bz)
+    G = layer.activation1.(Ug * x .+ Wg * S .+ bg)
+    R = layer.activation1.(Ur * x .+ Wr * S .+ br)
+    H = layer.activation2.(Uh * x .+ Wh * (S .* R) .+ bh)
+    S_new = (1 .- G) .* H .+ Z .* S
+    return S_new, st
 end
 
-function (L::dgm_lstm_block)(
-        S::AbstractVecOrMat{T}, x::AbstractVecOrMat{T}, ps, st::NamedTuple) where {T}
-    return apply_dgm_lstm_block(L.layers, S, x, ps, st)
+dgm_lstm_block_rearrange(Sᵢ₊₁, (Sᵢ, x)) = Sᵢ₊₁, x
+
+function DGMLSTMBlock(layers...)
+    blocks = AbstractLuxLayer[]
+    for (i, layer) in enumerate(layers)
+        if i == length(layers)
+            push!(blocks, layer)
+        else
+            push!(blocks, SkipConnection(layer, dgm_lstm_block_rearrange))
+        end
+    end
+    return Chain(blocks...)
 end
 
-struct dgm{S, L, E} <: Lux.AbstractExplicitContainerLayer{(:d_start, :lstm, :d_end)}
-    d_start::S
-    lstm::L
-    d_end::E
-end
-
-function (l::dgm)(x::AbstractVecOrMat{T}, ps, st::NamedTuple) where {T}
-    S, st_start = l.d_start(x, ps.d_start, st.d_start)
-    S, st_lstm = l.lstm(S, x, ps.lstm, st.lstm)
-    y, st_end = l.d_end(S, ps.d_end, st.d_end)
-
-    st_new = (
-        d_start = st_start,
-        lstm = st_lstm,
-        d_end = st_end
-    )
-    return y, st_new
+@concrete struct DGM <: AbstractLuxWrapperLayer{:model}
+    model
 end
 
 """
-    dgm(in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1, activation2, out_activation= Lux.identity)
+    DGM(in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1, activation2,
+        out_activation=identity)
 
 returns the architecture defined for Deep Galerkin method.
 
@@ -127,21 +92,20 @@ f(t, x, \\theta) &= \\sigma_{out}(W S^{L+1} + b).
 - `out_activation`: activation fn used for the output of the network.
 - `kwargs`: additional arguments to be splatted into [`PhysicsInformedNN`](@ref).
 """
-function dgm(in_dims::Int, out_dims::Int, modes::Int, layers::Int,
+function DGM(in_dims::Int, out_dims::Int, modes::Int, layers::Int,
         activation1, activation2, out_activation)
-    dgm(
-        Lux.Dense(in_dims, modes, activation1),
-        dgm_lstm_block([dgm_lstm_layer(in_dims, modes, activation1, activation2)
-                        for i in 1:layers]),
-        Lux.Dense(modes, out_dims, out_activation)
-    )
+    return DGM(Chain(
+        SkipConnection(
+            Dense(in_dims => modes, activation1),
+            DGMLSTMBlock([DGMLSTMLayer(in_dims, modes, activation1, activation2)
+                          for _ in 1:layers]...)),
+        Dense(modes => out_dims, out_activation)))
 end
 
 """
-    DeepGalerkin(in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1::Function, activation2::Function, out_activation::Function, 
-        strategy::NeuralPDE.AbstractTrainingStrategy; kwargs...)
-
-returns a `discretize` algorithm for the ModelingToolkit PDESystem interface, which transforms a `PDESystem` into an `OptimizationProblem` using the Deep Galerkin method.
+    DeepGalerkin(in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1::Function,
+        activation2::Function, out_activation::Function, strategy::AbstractTrainingStrategy;
+        kwargs...)
 
 ## Arguments:
 
@@ -166,10 +130,10 @@ Journal of Computational Physics, Volume 375, 2018, Pages 1339-1364, doi: https:
 """
 function DeepGalerkin(
         in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1::Function,
-        activation2::Function, out_activation::Function,
-        strategy::NeuralPDE.AbstractTrainingStrategy; kwargs...)
-    PhysicsInformedNN(
-        dgm(in_dims, out_dims, modes, L, activation1, activation2, out_activation),
+        activation2::Function, out_activation::Function, strategy::AbstractTrainingStrategy;
+        kwargs...)
+    return PhysicsInformedNN(
+        DGM(in_dims, out_dims, modes, L, activation1, activation2, out_activation),
         strategy; kwargs...
     )
 end
diff --git a/src/discretize.jl b/src/discretize.jl
index 9a40e0fe82..bed027aa2f 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -23,23 +23,14 @@ to
           end
       end)
 
-for Lux.AbstractExplicitLayer.
+for Lux.AbstractLuxLayer.
 """
 function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
-        eq_params = SciMLBase.NullParameters(),
-        param_estim = false,
-        default_p = nothing,
-        bc_indvars = pinnrep.indvars,
-        integrand = nothing,
-        dict_transformation_vars = nothing,
-        transformation_vars = nothing,
+        eq_params = SciMLBase.NullParameters(), param_estim = false, default_p = nothing,
+        bc_indvars = pinnrep.indvars, integrand = nothing,
+        dict_transformation_vars = nothing, transformation_vars = nothing,
         integrating_depvars = pinnrep.depvars)
-    @unpack indvars, depvars, dict_indvars, dict_depvars, dict_depvar_input,
-    phi, derivative, integral,
-    multioutput, init_params, strategy, eq_params,
-    param_estim, default_p = pinnrep
-
-    eltypeθ = eltype(pinnrep.flat_init_params)
+    (; depvars, dict_depvars, dict_depvar_input, phi, derivative, integral, multioutput, init_params, strategy, eq_params, param_estim, default_p) = pinnrep
 
     if integrand isa Nothing
         loss_function = parse_equation(pinnrep, eqs)
@@ -68,9 +59,6 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
         expr_θ = Expr[]
         expr_phi = Expr[]
 
-        acum = [0; accumulate(+, map(length, init_params))]
-        sep = [(acum[i] + 1):acum[i + 1] for i in 1:(length(acum) - 1)]
-
         for i in eachindex(depvars)
             push!(expr_θ, :($θ.depvar.$(depvars[i])))
             push!(expr_phi, :(phi[$i]))
@@ -138,34 +126,28 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
     end
     let_ex = Expr(:let, vars_eq, vcat_expr_loss_functions)
     push!(ex.args, let_ex)
-    expr_loss_function = :(($vars) -> begin
+    return :(($vars) -> begin
         $ex
     end)
 end
 
 """
-    build_loss_function(eqs, indvars, depvars, phi, derivative, init_params; bc_indvars=nothing)
+    build_loss_function(eqs, indvars, depvars, phi, derivative, init_params;
+        bc_indvars=nothing)
 
 Returns the body of loss function, which is the executable Julia function, for the main
 equation or boundary condition.
 """
 function build_loss_function(pinnrep::PINNRepresentation, eqs, bc_indvars)
-    @unpack eq_params, param_estim, default_p, phi, derivative, integral = pinnrep
+    (; eq_params, param_estim, default_p, phi, derivative, integral) = pinnrep
 
     bc_indvars = bc_indvars === nothing ? pinnrep.indvars : bc_indvars
 
-    expr_loss_function = build_symbolic_loss_function(pinnrep, eqs;
-        bc_indvars = bc_indvars,
-        eq_params = eq_params,
-        param_estim = param_estim,
-        default_p = default_p)
+    expr_loss_function = build_symbolic_loss_function(pinnrep, eqs; bc_indvars, eq_params,
+        param_estim, default_p)
     u = get_u()
     _loss_function = @RuntimeGeneratedFunction(expr_loss_function)
-    loss_function = (cord, θ) -> begin
-        _loss_function(cord, θ, phi, derivative, integral, u,
-            default_p)
-    end
-    return loss_function
+    return (cord, θ) -> _loss_function(cord, θ, phi, derivative, integral, u, default_p)
 end
 
 """
@@ -178,8 +160,7 @@ function generate_training_sets end
 
 function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, _indvars::Array,
         _depvars::Array)
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-        _depvars)
+    _, _, dict_indvars, dict_depvars, _ = get_vars(_indvars, _depvars)
     return generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars,
         dict_depvars)
 end
@@ -187,11 +168,7 @@ end
 # Generate training set in the domain and on the boundary
 function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::Dict,
         dict_depvars::Dict)
-    if dx isa Array
-        dxs = dx
-    else
-        dxs = fill(dx, length(domains))
-    end
+    dxs = dx isa Array ? dx : fill(dx, length(domains))
 
     spans = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, dxs)]
     dict_var_span = Dict([Symbol(d.variables) => infimum(d.domain):dx:supremum(d.domain)
@@ -201,12 +178,8 @@ function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::D
     bound_vars = get_variables(bcs, dict_indvars, dict_depvars)
 
     dif = [eltypeθ[] for i in 1:size(domains)[1]]
-    for _args in bound_vars
-        for (i, x) in enumerate(_args)
-            if x isa Number
-                push!(dif[i], x)
-            end
-        end
+    for _args in bound_vars, (i, x) in enumerate(_args)
+        x isa Number && push!(dif[i], x)
     end
     cord_train_set = collect.(spans)
     bc_data = map(zip(dif, cord_train_set)) do (d, c)
@@ -216,24 +189,20 @@ function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::D
     dict_var_span_ = Dict([Symbol(d.variables) => bc for (d, bc) in zip(domains, bc_data)])
 
     bcs_train_sets = map(bound_args) do bt
-        span = map(b -> get(dict_var_span, b, b), bt)
-        _set = adapt(eltypeθ,
-            hcat(vec(map(points -> collect(points), Iterators.product(span...)))...))
+        span = get.((dict_var_span,), bt, bt)
+        return reduce(hcat, vec(map(collect, Iterators.product(span...)))) |>
+               EltypeAdaptor{eltypeθ}()
     end
 
-    pde_vars = get_variables(eqs, dict_indvars, dict_depvars)
     pde_args = get_argument(eqs, dict_indvars, dict_depvars)
 
-    pde_train_set = adapt(eltypeθ,
-        hcat(vec(map(points -> collect(points),
-            Iterators.product(bc_data...)))...))
-
     pde_train_sets = map(pde_args) do bt
-        span = map(b -> get(dict_var_span_, b, b), bt)
-        _set = adapt(eltypeθ,
-            hcat(vec(map(points -> collect(points), Iterators.product(span...)))...))
+        span = get.((dict_var_span_,), bt, bt)
+        return reduce(hcat, vec(map(collect, Iterators.product(span...)))) |>
+               EltypeAdaptor{eltypeθ}()
     end
-    [pde_train_sets, bcs_train_sets]
+
+    return [pde_train_sets, bcs_train_sets]
 end
 
 """
@@ -245,32 +214,33 @@ training strategy: StochasticTraining, QuasiRandomTraining, QuadratureTraining.
 function get_bounds end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, _indvars::Array, _depvars::Array, strategy)
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-        _depvars)
+    _, _, dict_indvars, dict_depvars, _ = get_vars(_indvars, _depvars)
     return get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
 end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, _indvars::Array, _depvars::Array,
         strategy::QuadratureTraining)
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-        _depvars)
+    _, _, dict_indvars, dict_depvars, _ = get_vars(_indvars, _depvars)
     return get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
 end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
-        strategy::QuadratureTraining)
+        ::QuadratureTraining)
     dict_lower_bound = Dict([Symbol(d.variables) => infimum(d.domain) for d in domains])
     dict_upper_bound = Dict([Symbol(d.variables) => supremum(d.domain) for d in domains])
 
     pde_args = get_argument(eqs, dict_indvars, dict_depvars)
 
+    ϵ = cbrt(eps(eltypeθ))
+    eltype_adaptor = EltypeAdaptor{eltypeθ}()
+
     pde_lower_bounds = map(pde_args) do pd
-        span = map(p -> get(dict_lower_bound, p, p), pd)
-        map(s -> adapt(eltypeθ, s) + cbrt(eps(eltypeθ)), span)
+        span = get.((dict_lower_bound,), pd, pd) |> eltype_adaptor
+        return span .+ ϵ
     end
     pde_upper_bounds = map(pde_args) do pd
-        span = map(p -> get(dict_upper_bound, p, p), pd)
-        map(s -> adapt(eltypeθ, s) - cbrt(eps(eltypeθ)), span)
+        span = get.((dict_upper_bound,), pd, pd) |> eltype_adaptor
+        return span .+ ϵ
     end
     pde_bounds = [pde_lower_bounds, pde_upper_bounds]
 
@@ -284,42 +254,39 @@ function get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
     end
     bcs_bounds = [bcs_lower_bounds, bcs_upper_bounds]
 
-    [pde_bounds, bcs_bounds]
+    return [pde_bounds, bcs_bounds]
 end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
     dx = 1 / strategy.points
     dict_span = Dict([Symbol(d.variables) => [
-                          infimum(d.domain) + dx,
-                          supremum(d.domain) - dx
-                      ] for d in domains])
+                          infimum(d.domain) + dx, supremum(d.domain) - dx] for d in domains])
 
-    # pde_bounds = [[infimum(d.domain),supremum(d.domain)] for d in domains]
     pde_args = get_argument(eqs, dict_indvars, dict_depvars)
     pde_bounds = map(pde_args) do pde_arg
         bds = mapreduce(s -> get(dict_span, s, fill(s, 2)), hcat, pde_arg)
         bds = eltypeθ.(bds)
-        bds[1, :], bds[2, :]
+        return bds[1, :], bds[2, :]
     end
 
     bound_args = get_argument(bcs, dict_indvars, dict_depvars)
     bcs_bounds = map(bound_args) do bound_arg
         bds = mapreduce(s -> get(dict_span, s, fill(s, 2)), hcat, bound_arg)
         bds = eltypeθ.(bds)
-        bds[1, :], bds[2, :]
+        return bds[1, :], bds[2, :]
     end
+
     return pde_bounds, bcs_bounds
 end
 
 function get_numeric_integral(pinnrep::PINNRepresentation)
-    @unpack strategy, indvars, depvars, multioutput, derivative,
-    depvars, indvars, dict_indvars, dict_depvars = pinnrep
+    (; strategy, indvars, depvars, derivative, depvars, indvars, dict_indvars, dict_depvars) = pinnrep
 
-    integral = (u, cord, phi, integrating_var_id, integrand_func, lb, ub, θ; strategy = strategy, indvars = indvars, depvars = depvars, dict_indvars = dict_indvars, dict_depvars = dict_depvars) -> begin
+    return (u, cord, phi, integrating_var_id, integrand_func, lb, ub, θ; strategy = strategy, indvars = indvars, depvars = depvars, dict_indvars = dict_indvars, dict_depvars = dict_depvars) -> begin
         function integration_(cord, lb, ub, θ)
             cord_ = cord
             function integrand_(x, p)
-                ChainRulesCore.@ignore_derivatives @views(cord_[integrating_var_id]) .= x
+                @ignore_derivatives cord_[integrating_var_id] .= x
                 return integrand_func(cord_, p, phi, derivative, nothing, u, nothing)
             end
             prob_ = IntegralProblem(integrand_, (lb, ub), θ)
@@ -332,24 +299,22 @@ function get_numeric_integral(pinnrep::PINNRepresentation)
         ub_ = zeros(size(ub)[1], size(cord)[2])
         for (i, l) in enumerate(lb)
             if l isa Number
-                ChainRulesCore.@ignore_derivatives lb_[i, :] = fill(l, 1, size(cord)[2])
+                @ignore_derivatives lb_[i, :] .= l
             else
-                ChainRulesCore.@ignore_derivatives lb_[i, :] = l(cord, θ, phi, derivative,
-                    nothing, u, nothing)
+                @ignore_derivatives lb_[i, :] = l(
+                    cord, θ, phi, derivative, nothing, u, nothing)
             end
         end
         for (i, u_) in enumerate(ub)
             if u_ isa Number
-                ChainRulesCore.@ignore_derivatives ub_[i, :] = fill(u_, 1, size(cord)[2])
+                @ignore_derivatives ub_[i, :] .= u_
             else
-                ChainRulesCore.@ignore_derivatives ub_[i, :] = u_(cord, θ, phi, derivative,
+                @ignore_derivatives ub_[i, :] = u_(cord, θ, phi, derivative,
                     nothing, u, nothing)
             end
         end
         integration_arr = Matrix{Float64}(undef, 1, 0)
-        for i in 1:size(cord)[2]
-            # ub__ = @Zygote.ignore getindex(ub_, :,  i)
-            # lb__ = @Zygote.ignore getindex(lb_, :,  i)
+        for i in 1:size(cord, 2)
             integration_arr = hcat(integration_arr,
                 integration_(cord[:, i], lb_[:, i], ub_[:, i], θ))
         end
@@ -364,33 +329,25 @@ end
 It transforms a symbolic description of a ModelingToolkit-defined `PDESystem` into a
 `PINNRepresentation` which holds the pieces required to build an `OptimizationProblem`
 for [Optimization.jl](https://docs.sciml.ai/Optimization/stable) or a Likelihood Function
-used for HMC based Posterior Sampling Algorithms [AdvancedHMC.jl](https://turinglang.org/AdvancedHMC.jl/stable/)
-which is later optimized upon to give Solution or the Solution Distribution of the PDE.
+used for HMC based Posterior Sampling Algorithms
+[AdvancedHMC.jl](https://turinglang.org/AdvancedHMC.jl/stable/) which is later optimized
+upon to give Solution or the Solution Distribution of the PDE.
 
 For more information, see `discretize` and `PINNRepresentation`.
 """
-function SciMLBase.symbolic_discretize(pde_system::PDESystem,
-        discretization::AbstractPINN)
-    eqs = pde_system.eqs
-    bcs = pde_system.bcs
-    chain = discretization.chain
-
-    domains = pde_system.domain
+function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::AbstractPINN)
+    (; eqs, bcs, domain) = pde_system
     eq_params = pde_system.ps
     defaults = pde_system.defaults
-    default_p = eq_params == SciMLBase.NullParameters() ? nothing :
-                [defaults[ep] for ep in eq_params]
-
-    param_estim = discretization.param_estim
-    additional_loss = discretization.additional_loss
+    (; chain, param_estim, additional_loss, multioutput, init_params, phi, derivative, strategy, logger, iteration, self_increment) = discretization
+    (; log_frequency) = discretization.log_options
     adaloss = discretization.adaptive_loss
 
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(
-        pde_system.indvars,
-        pde_system.depvars)
+    default_p = eq_params isa SciMLBase.NullParameters ? nothing :
+                [defaults[ep] for ep in eq_params]
 
-    multioutput = discretization.multioutput
-    init_params = discretization.init_params
+    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(
+        pde_system.indvars, pde_system.depvars)
 
     if init_params === nothing
         # Use the initialization of the neural network framework
@@ -398,70 +355,41 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
         # This is done because Float64 is almost always better for these applications
         if chain isa AbstractArray
             x = map(chain) do x
-                _x = ComponentArrays.ComponentArray(Lux.initialparameters(
-                    Random.default_rng(),
-                    x))
-                Float64.(_x) # No ComponentArray GPU support
+                ComponentArray{Float64}(LuxCore.initialparameters(Random.default_rng(), x))
             end
             names = ntuple(i -> depvars[i], length(chain))
-            init_params = ComponentArrays.ComponentArray(NamedTuple{names}(i
-            for i in x))
+            init_params = ComponentArray(NamedTuple{names}(Tuple(x)))
         else
-            init_params = Float64.(ComponentArrays.ComponentArray(Lux.initialparameters(
-                Random.default_rng(),
-                chain)))
+            init_params = ComponentArray{Float64}(LuxCore.initialparameters(
+                Random.default_rng(), chain))
         end
-    else
-        init_params = init_params
     end
 
-    flat_init_params = if init_params isa ComponentArrays.ComponentArray
+    flat_init_params = if init_params isa ComponentArray
         init_params
     elseif multioutput
         @assert length(init_params) == length(depvars)
         names = ntuple(i -> depvars[i], length(init_params))
-        x = ComponentArrays.ComponentArray(NamedTuple{names}(i for i in init_params))
+        x = ComponentArray(NamedTuple{names}(Tuple(init_params)))
     else
-        ComponentArrays.ComponentArray(init_params)
+        ComponentArray(init_params)
     end
 
-    flat_init_params = if param_estim == false && multioutput
-        ComponentArrays.ComponentArray(; depvar = flat_init_params)
-    elseif param_estim == false && !multioutput
-        flat_init_params
+    flat_init_params = if !param_estim
+        multioutput ? ComponentArray(; depvar = flat_init_params) : flat_init_params
     else
-        ComponentArrays.ComponentArray(; depvar = flat_init_params, p = default_p)
+        ComponentArray(; depvar = flat_init_params, p = default_p)
     end
 
-    eltypeθ = eltype(flat_init_params)
-
-    if adaloss === nothing
-        adaloss = NonAdaptiveLoss{eltypeθ}()
+    if length(flat_init_params) == 0 && !Base.isconcretetype(eltype(flat_init_params))
+        flat_init_params = ComponentArray(
+            convert(AbstractArray{Float64}, getdata(flat_init_params)),
+            getaxes(flat_init_params))
     end
 
-    phi = discretization.phi
+    adaloss === nothing && (adaloss = NonAdaptiveLoss{eltype(flat_init_params)}())
 
-    if (phi isa Vector && phi[1].f isa Lux.AbstractExplicitLayer)
-        for ϕ in phi
-            ϕ.st = adapt(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-                ϕ.st)
-        end
-    elseif (!(phi isa Vector) && phi.f isa Lux.AbstractExplicitLayer)
-        phi.st = adapt(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-            phi.st)
-    end
-
-    derivative = discretization.derivative
-    strategy = discretization.strategy
-
-    logger = discretization.logger
-    log_frequency = discretization.log_options.log_frequency
-    iteration = discretization.iteration
-    self_increment = discretization.self_increment
-
-    if !(eqs isa Array)
-        eqs = [eqs]
-    end
+    eqs isa Array || (eqs = [eqs])
 
     pde_indvars = if strategy isa QuadratureTraining
         get_argument(eqs, dict_indvars, dict_depvars)
@@ -478,7 +406,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
     pde_integration_vars = get_integration_variables(eqs, dict_indvars, dict_depvars)
     bc_integration_vars = get_integration_variables(bcs, dict_indvars, dict_depvars)
 
-    pinnrep = PINNRepresentation(eqs, bcs, domains, eq_params, defaults, default_p,
+    pinnrep = PINNRepresentation(eqs, bcs, domain, eq_params, defaults, default_p,
         param_estim, additional_loss, adaloss, depvars, indvars,
         dict_indvars, dict_depvars, dict_depvar_input, logger,
         multioutput, iteration, init_params, flat_init_params, phi,
@@ -503,24 +431,19 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
     pinnrep.symbolic_bc_loss_functions = symbolic_bc_loss_functions
 
     datafree_pde_loss_functions = [build_loss_function(pinnrep, eq, pde_indvar)
-                                   for (eq, pde_indvar, integration_indvar) in zip(eqs,
-        pde_indvars,
-        pde_integration_vars)]
+                                   for (eq, pde_indvar) in zip(eqs, pde_indvars)]
 
     datafree_bc_loss_functions = [build_loss_function(pinnrep, bc, bc_indvar)
-                                  for (bc, bc_indvar, integration_indvar) in zip(bcs,
-        bc_indvars,
-        bc_integration_vars)]
+                                  for (bc, bc_indvar) in zip(bcs, bc_indvars)]
 
     pde_loss_functions, bc_loss_functions = merge_strategy_with_loss_function(pinnrep,
-        strategy,
-        datafree_pde_loss_functions,
-        datafree_bc_loss_functions)
+        strategy, datafree_pde_loss_functions, datafree_bc_loss_functions)
+
     # setup for all adaptive losses
     num_pde_losses = length(pde_loss_functions)
     num_bc_losses = length(bc_loss_functions)
     # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
-    num_additional_loss = additional_loss isa Nothing ? 0 : 1
+    num_additional_loss = convert(Int, additional_loss !== nothing)
 
     adaloss_T = eltype(adaloss.pde_loss_weights)
 
@@ -531,10 +454,9 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
                                       adaloss.additional_loss_weights
 
     reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
-        pde_loss_functions,
-        bc_loss_functions)
+        pde_loss_functions, bc_loss_functions)
 
-    function get_likelihood_estimate_function(discretization::PhysicsInformedNN)
+    function get_likelihood_estimate_function(::PhysicsInformedNN)
         function full_loss_function(θ, p)
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
             pde_losses = [pde_loss_function(θ) for pde_loss_function in pde_loss_functions]
@@ -542,13 +464,12 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
 
             # this is kind of a hack, and means that whenever the outer function is evaluated the increment goes up, even if it's not being optimized
             # that's why we prefer the user to maintain the increment in the outer loop callback during optimization
-            ChainRulesCore.@ignore_derivatives if self_increment
-                iteration[1] += 1
+            @ignore_derivatives if self_increment
+                iteration[] += 1
             end
 
-            ChainRulesCore.@ignore_derivatives begin
-                reweight_losses_func(θ, pde_losses,
-                    bc_losses)
+            @ignore_derivatives begin
+                reweight_losses_func(θ, pde_losses, bc_losses)
             end
 
             weighted_pde_losses = adaloss.pde_loss_weights .* pde_losses
@@ -562,50 +483,37 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             full_weighted_loss = if additional_loss isa Nothing
                 weighted_loss_before_additional
             else
-                function _additional_loss(phi, θ)
-                    (θ_, p_) = if (param_estim == true)
-                        θ.depvar, θ.p
-                    else
-                        θ, nothing
-                    end
-                    return additional_loss(phi, θ_, p_)
-                end
+                (θ_, p_) = param_estim ? (θ.depvar, θ.p) : (θ, nothing)
+                _additional_loss = additional_loss(phi, θ_, p_)
                 weighted_additional_loss_val = adaloss.additional_loss_weights[1] *
-                                               _additional_loss(phi, θ)
+                                               _additional_loss
                 weighted_loss_before_additional + weighted_additional_loss_val
             end
 
-            ChainRulesCore.@ignore_derivatives begin
-                if iteration[1] % log_frequency == 0
+            @ignore_derivatives begin
+                if iteration[] % log_frequency == 0
                     logvector(pinnrep.logger, pde_losses, "unweighted_loss/pde_losses",
-                        iteration[1])
-                    logvector(pinnrep.logger,
-                        bc_losses,
-                        "unweighted_loss/bc_losses",
-                        iteration[1])
+                        iteration[])
+                    logvector(pinnrep.logger, bc_losses, "unweighted_loss/bc_losses",
+                        iteration[])
                     logvector(pinnrep.logger, weighted_pde_losses,
-                        "weighted_loss/weighted_pde_losses",
-                        iteration[1])
+                        "weighted_loss/weighted_pde_losses", iteration[])
                     logvector(pinnrep.logger, weighted_bc_losses,
-                        "weighted_loss/weighted_bc_losses",
-                        iteration[1])
-                    if !(additional_loss isa Nothing)
+                        "weighted_loss/weighted_bc_losses", iteration[])
+                    if additional_loss !== nothing
                         logscalar(pinnrep.logger, weighted_additional_loss_val,
-                            "weighted_loss/weighted_additional_loss", iteration[1])
+                            "weighted_loss/weighted_additional_loss", iteration[])
                     end
                     logscalar(pinnrep.logger, sum_weighted_pde_losses,
-                        "weighted_loss/sum_weighted_pde_losses", iteration[1])
+                        "weighted_loss/sum_weighted_pde_losses", iteration[])
                     logscalar(pinnrep.logger, sum_weighted_bc_losses,
-                        "weighted_loss/sum_weighted_bc_losses", iteration[1])
+                        "weighted_loss/sum_weighted_bc_losses", iteration[])
                     logscalar(pinnrep.logger, full_weighted_loss,
-                        "weighted_loss/full_weighted_loss",
-                        iteration[1])
+                        "weighted_loss/full_weighted_loss", iteration[])
                     logvector(pinnrep.logger, adaloss.pde_loss_weights,
-                        "adaptive_loss/pde_loss_weights",
-                        iteration[1])
+                        "adaptive_loss/pde_loss_weights", iteration[])
                     logvector(pinnrep.logger, adaloss.bc_loss_weights,
-                        "adaptive_loss/bc_loss_weights",
-                        iteration[1])
+                        "adaptive_loss/bc_loss_weights", iteration[])
                 end
             end
 
@@ -621,14 +529,13 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
         # required as Physics loss also needed on the discrete dataset domain points
         # data points are discrete and so by default GridTraining loss applies
         # passing placeholder dx with GridTraining, it uses data points irl
-        datapde_loss_functions, databc_loss_functions = if (!(dataset_bc isa Nothing) ||
-                                                            !(dataset_pde isa Nothing))
-            merge_strategy_with_loglikelihood_function(pinnrep,
-                GridTraining(0.1),
-                datafree_pde_loss_functions,
-                datafree_bc_loss_functions, train_sets_pde = dataset_pde, train_sets_bc = dataset_bc)
+        datapde_loss_functions, databc_loss_functions = if dataset_bc !== nothing ||
+                                                           dataset_pde !== nothing
+            merge_strategy_with_loglikelihood_function(pinnrep, GridTraining(0.1),
+                datafree_pde_loss_functions, datafree_bc_loss_functions,
+                train_sets_pde = dataset_pde, train_sets_bc = dataset_bc)
         else
-            (nothing, nothing)
+            nothing, nothing
         end
 
         function full_loss_function(θ, allstd::Vector{Vector{Float64}})
@@ -652,11 +559,11 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
 
             # this is kind of a hack, and means that whenever the outer function is evaluated the increment goes up, even if it's not being optimized
             # that's why we prefer the user to maintain the increment in the outer loop callback during optimization
-            ChainRulesCore.@ignore_derivatives if self_increment
-                iteration[1] += 1
+            @ignore_derivatives if self_increment
+                iteration[] += 1
             end
 
-            ChainRulesCore.@ignore_derivatives begin
+            @ignore_derivatives begin
                 reweight_losses_func(θ, pde_loglikelihoods,
                     bc_loglikelihoods)
             end
@@ -672,17 +579,9 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             full_weighted_loglikelihood = if additional_loss isa Nothing
                 weighted_loglikelihood_before_additional
             else
-                function _additional_loss(phi, θ)
-                    (θ_, p_) = if (param_estim == true)
-                        θ.depvar, θ.p
-                    else
-                        θ, nothing
-                    end
-                    return additional_loss(phi, θ_, p_)
-                end
-
-                _additional_loglikelihood = logpdf(Normal(0, stdextra),
-                    _additional_loss(phi, θ))
+                (θ_, p_) = param_estim ? (θ.depvar, θ.p) : (θ, nothing)
+                _additional_loss = additional_loss(phi, θ_, p_)
+                _additional_loglikelihood = logpdf(Normal(0, stdextra), _additional_loss)
 
                 weighted_additional_loglikelihood = adaloss.additional_loss_weights[1] *
                                                     _additional_loglikelihood
@@ -698,8 +597,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
 
     full_loss_function = get_likelihood_estimate_function(discretization)
     pinnrep.loss_functions = PINNLossFunctions(bc_loss_functions, pde_loss_functions,
-        full_loss_function, additional_loss,
-        datafree_pde_loss_functions,
+        full_loss_function, additional_loss, datafree_pde_loss_functions,
         datafree_bc_loss_functions)
 
     return pinnrep
@@ -709,12 +607,11 @@ end
     prob = discretize(pde_system::PDESystem, discretization::PhysicsInformedNN)
 
 Transforms a symbolic description of a ModelingToolkit-defined `PDESystem` and generates
-an `OptimizationProblem` for [Optimization.jl](https://docs.sciml.ai/Optimization/stable/) whose
-solution is the solution to the PDE.
+an `OptimizationProblem` for [Optimization.jl](https://docs.sciml.ai/Optimization/stable/)
+whose solution is the solution to the PDE.
 """
 function SciMLBase.discretize(pde_system::PDESystem, discretization::PhysicsInformedNN)
     pinnrep = symbolic_discretize(pde_system, discretization)
-    f = OptimizationFunction(pinnrep.loss_functions.full_loss_function,
-        Optimization.AutoZygote())
-    Optimization.OptimizationProblem(f, pinnrep.flat_init_params)
+    f = OptimizationFunction(pinnrep.loss_functions.full_loss_function, AutoZygote())
+    return Optimization.OptimizationProblem(f, pinnrep.flat_init_params)
 end
diff --git a/src/eltype_matching.jl b/src/eltype_matching.jl
new file mode 100644
index 0000000000..d0d25be885
--- /dev/null
+++ b/src/eltype_matching.jl
@@ -0,0 +1,14 @@
+struct EltypeAdaptor{T} end
+
+(l::EltypeAdaptor)(x) = fmap(Adapt.adapt(l), x)
+function (l::EltypeAdaptor)(x::AbstractArray{T}) where {T}
+    return (isbitstype(T) || T <: Number) ? Adapt.adapt(l, x) : map(l, x)
+end
+
+function Adapt.adapt_storage(::EltypeAdaptor{T}, x::AbstractArray) where {T}
+    return convert(AbstractArray{T}, x)
+end
+
+function Adapt.adapt_storage(::EltypeAdaptor{T}, x::AbstractArray{<:Complex}) where {T}
+    return convert(AbstractArray{Complex{T}}, x)
+end
diff --git a/src/neural_adapter.jl b/src/neural_adapter.jl
index e54c6e8186..fffd69749b 100644
--- a/src/neural_adapter.jl
+++ b/src/neural_adapter.jl
@@ -1,103 +1,54 @@
 function generate_training_sets(domains, dx, eqs, eltypeθ)
-    if dx isa Array
-        dxs = dx
-    else
-        dxs = fill(dx, length(domains))
-    end
+    dxs = dx isa Array ? dx : fill(dx, length(domains))
     spans = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, dxs)]
-    train_set = adapt(eltypeθ,
-        hcat(vec(map(points -> collect(points), Iterators.product(spans...)))...))
+    return reduce(hcat, vec(map(collect, Iterators.product(spans...)))) |>
+           EltypeAdaptor{eltypeθ}()
 end
 
-function get_loss_function_(loss, init_params, pde_system, strategy::GridTraining)
-    eqs = pde_system.eqs
-    if !(eqs isa Array)
-        eqs = [eqs]
-    end
-    domains = pde_system.domain
-    depvars, indvars, dict_indvars, dict_depvars = get_vars(pde_system.indvars,
-        pde_system.depvars)
-    eltypeθ = eltype(init_params)
-    dx = strategy.dx
-    train_set = generate_training_sets(domains, dx, eqs, eltypeθ)
-    get_loss_function(loss, train_set, eltypeθ, strategy)
-end
-
-function get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, strategy)
+function get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, _)
     dict_span = Dict([Symbol(d.variables) => [infimum(d.domain), supremum(d.domain)]
                       for d in domains])
     args = get_argument(eqs, dict_indvars, dict_depvars)
 
     bounds = first(map(args) do pd
-        span = map(p -> get(dict_span, p, p), pd)
-        map(s -> adapt(eltypeθ, s), span)
+        return get.((dict_span,), pd, pd) |> EltypeAdaptor{eltypeθ}()
     end)
-    bounds = [getindex.(bounds, 1), getindex.(bounds, 2)]
-    return bounds
+    return first.(bounds), last.(bounds)
 end
 
-function get_loss_function_(loss, init_params, pde_system, strategy::StochasticTraining)
+function get_loss_function_neural_adapter(
+        loss, init_params, pde_system, strategy::GridTraining)
     eqs = pde_system.eqs
-    if !(eqs isa Array)
-        eqs = [eqs]
-    end
-    domains = pde_system.domain
-
-    depvars, indvars, dict_indvars, dict_depvars = get_vars(pde_system.indvars,
-        pde_system.depvars)
-
-    eltypeθ = eltype(init_params)
-    bound = get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, strategy)
-    get_loss_function(loss, bound, eltypeθ, strategy)
+    eqs isa Array || (eqs = [eqs])
+    eltypeθ = recursive_eltype(init_params)
+    train_set = generate_training_sets(pde_system.domain, strategy.dx, eqs, eltypeθ)
+    return get_loss_function(init_params, loss, train_set, eltypeθ, strategy)
 end
 
-function get_loss_function_(loss, init_params, pde_system, strategy::QuasiRandomTraining)
+function get_loss_function_neural_adapter(loss, init_params, pde_system,
+        strategy::Union{StochasticTraining, QuasiRandomTraining})
     eqs = pde_system.eqs
-    if !(eqs isa Array)
-        eqs = [eqs]
-    end
+    eqs isa Array || (eqs = [eqs])
     domains = pde_system.domain
 
-    depvars, indvars, dict_indvars, dict_depvars = get_vars(pde_system.indvars,
-        pde_system.depvars)
+    _, _, dict_indvars, dict_depvars = get_vars(pde_system.indvars, pde_system.depvars)
 
-    eltypeθ = eltype(init_params)
+    eltypeθ = recursive_eltype(init_params)
     bound = get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, strategy)
-    get_loss_function(loss, bound, eltypeθ, strategy)
+    return get_loss_function(init_params, loss, bound, eltypeθ, strategy)
 end
 
-function get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars,
-        strategy::QuadratureTraining)
-    dict_lower_bound = Dict([Symbol(d.variables) => infimum(d.domain) for d in domains])
-    dict_upper_bound = Dict([Symbol(d.variables) => supremum(d.domain) for d in domains])
-
-    args = get_argument(eqs, dict_indvars, dict_depvars)
-
-    lower_bounds = map(args) do pd
-        span = map(p -> get(dict_lower_bound, p, p), pd)
-        map(s -> adapt(eltypeθ, s), span)
-    end
-    upper_bounds = map(args) do pd
-        span = map(p -> get(dict_upper_bound, p, p), pd)
-        map(s -> adapt(eltypeθ, s), span)
-    end
-    bound = lower_bounds, upper_bounds
-end
-
-function get_loss_function_(loss, init_params, pde_system, strategy::QuadratureTraining)
+function get_loss_function_neural_adapter(
+        loss, init_params, pde_system, strategy::QuadratureTraining)
     eqs = pde_system.eqs
-    if !(eqs isa Array)
-        eqs = [eqs]
-    end
+    eqs isa Array || (eqs = [eqs])
     domains = pde_system.domain
 
-    depvars, indvars, dict_indvars, dict_depvars = get_vars(pde_system.indvars,
-        pde_system.depvars)
+    _, _, dict_indvars, dict_depvars = get_vars(pde_system.indvars, pde_system.depvars)
 
-    eltypeθ = eltype(init_params)
-    bound = get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, strategy)
-    lb, ub = bound
-    get_loss_function(loss, lb[1], ub[1], eltypeθ, strategy)
+    eltypeθ = recursive_eltype(init_params)
+    lb, ub = get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, strategy)
+    return get_loss_function(init_params, loss, lb, ub, eltypeθ, strategy)
 end
 
 """
@@ -115,24 +66,17 @@ Trains a neural network using the results from one already obtained prediction.
 function neural_adapter end
 
 function neural_adapter(loss, init_params, pde_system, strategy)
-    loss_function__ = get_loss_function_(loss, init_params, pde_system, strategy)
-
-    function loss_function_(θ, p)
-        loss_function__(θ)
-    end
-    f_ = OptimizationFunction(loss_function_, Optimization.AutoZygote())
-    prob = Optimization.OptimizationProblem(f_, init_params)
+    loss_function = get_loss_function_neural_adapter(
+        loss, init_params, pde_system, strategy)
+    return OptimizationProblem(
+        OptimizationFunction((θ, _) -> loss_function(θ), AutoZygote()), init_params)
 end
 
 function neural_adapter(losses::Array, init_params, pde_systems::Array, strategy)
-    loss_functions_ = map(zip(losses, pde_systems)) do (l, p)
-        get_loss_function_(l, init_params, p, strategy)
-    end
-    loss_function__ = θ -> sum(map(l -> l(θ), loss_functions_))
-    function loss_function_(θ, p)
-        loss_function__(θ)
+    loss_functions = map(zip(losses, pde_systems)) do (l, p)
+        get_loss_function_neural_adapter(l, init_params, p, strategy)
     end
-
-    f_ = OptimizationFunction(loss_function_, Optimization.AutoZygote())
-    prob = Optimization.OptimizationProblem(f_, init_params)
+    return OptimizationProblem(
+        OptimizationFunction((θ, _) -> sum(l -> l(θ), loss_functions), AutoZygote()),
+        init_params)
 end
diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index bcf9c68ebe..fe6a770cd4 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -1,12 +1,14 @@
 abstract type NeuralPDEAlgorithm <: SciMLBase.AbstractODEAlgorithm end
 
 """
-    NNODE(chain, opt, init_params = nothing; autodiff = false, batch = 0, additional_loss = nothing, kwargs...)
+    NNODE(chain, opt, init_params = nothing; autodiff = false, batch = 0,
+          additional_loss = nothing, kwargs...)
 
-Algorithm for solving ordinary differential equations using a neural network. This is a specialization
-of the physics-informed neural network which is used as a solver for a standard `ODEProblem`.
+Algorithm for solving ordinary differential equations using a neural network. This is a
+specialization of the physics-informed neural network which is used as a solver for a
+standard `ODEProblem`.
 
-!!! warn
+!!! warning
 
     Note that NNODE only supports ODEs which are written in the out-of-place form, i.e.
     `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the NNODE
@@ -14,24 +16,31 @@ of the physics-informed neural network which is used as a solver for a standard
 
 ## Positional Arguments
 
-* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer` or `Flux.Chain`.
-          `Flux.Chain` will be converted to `Lux` using `adapt(FromFluxAdaptor(false, false), chain)`.
+* `chain`: A neural network architecture, defined as a `Lux.AbstractLuxLayer` or
+           `Flux.Chain`. `Flux.Chain` will be converted to `Lux` using
+           `adapt(FromFluxAdaptor(), chain)`.
 * `opt`: The optimizer to train the neural network.
 * `init_params`: The initial parameter of the neural network. By default, this is `nothing`
-                 which thus uses the random initialization provided by the neural network library.
+                 which thus uses the random initialization provided by the neural network
+                 library.
 
 ## Keyword Arguments
-* `additional_loss`: A function additional_loss(phi, θ) where phi are the neural network trial solutions,
-                     θ are the weights of the neural network(s).
+
+* `additional_loss`: A function additional_loss(phi, θ) where phi are the neural network
+                     trial solutions, θ are the weights of the neural network(s).
 * `autodiff`: The switch between automatic and numerical differentiation for
               the PDE operators. The reverse mode of the loss function is always
               automatic differentiation (via Zygote), this is only for the derivative
               in the loss function (the derivative with respect to time).
-* `batch`: The batch size for the loss computation. Defaults to `true`, means the neural network is applied at a row vector of values
-           `t` simultaneously, i.e. it's the batch size for the neural network evaluations. This requires a neural network compatible with batched data.
-           `false` means which means the application of the neural network is done at individual time points one at a time.
-           This is not applicable to `QuadratureTraining` where `batch` is passed in the `strategy` which is the number of points it can parallelly compute the integrand.
-* `param_estim`: Boolean to indicate whether parameters of the differential equations are learnt along with parameters of the neural network.
+* `batch`: The batch size for the loss computation. Defaults to `true`, means the neural
+           network is applied at a row vector of values `t` simultaneously, i.e. it's the
+           batch size for the neural network evaluations. This requires a neural network
+           compatible with batched data. `false` means which means the application of the
+           neural network is done at individual time points one at a time. This is not
+           applicable to `QuadratureTraining` where `batch` is passed in the `strategy`
+           which is the number of points it can parallelly compute the integrand.
+* `param_estim`: Boolean to indicate whether parameters of the differential equations are
+                 learnt along with parameters of the neural network.
 * `strategy`: The training strategy used to choose the points for the evaluations.
               Default of `nothing` means that `QuadratureTraining` with QuadGK is used if no
               `dt` is given, and `GridTraining` is used with `dt` if given.
@@ -61,94 +70,81 @@ sol = solve(prob, NNODE(chain, opt), verbose = true, abstol = 1e-10, maxiters =
 
 ## Solution Notes
 
-Note that the solution is evaluated at fixed time points according to standard output handlers
-such as `saveat` and `dt`. However, the neural network is a fully continuous solution so `sol(t)`
-is an accurate interpolation (up to the neural network training result). In addition, the
-`OptimizationSolution` is returned as `sol.k` for further analysis.
+Note that the solution is evaluated at fixed time points according to standard output
+handlers such as `saveat` and `dt`. However, the neural network is a fully continuous
+solution so `sol(t)` is an accurate interpolation (up to the neural network training
+result). In addition, the `OptimizationSolution` is returned as `sol.k` for further
+analysis.
 
 ## References
 
-Lagaris, Isaac E., Aristidis Likas, and Dimitrios I. Fotiadis. "Artificial neural networks for solving
-ordinary and partial differential equations." IEEE Transactions on Neural Networks 9, no. 5 (1998): 987-1000.
+Lagaris, Isaac E., Aristidis Likas, and Dimitrios I. Fotiadis. "Artificial neural networks
+for solving ordinary and partial differential equations." IEEE Transactions on Neural
+Networks 9, no. 5 (1998): 987-1000.
 """
-struct NNODE{C, O, P, B, PE, K, AL <: Union{Nothing, Function},
-    S <: Union{Nothing, AbstractTrainingStrategy}
-} <:
-       NeuralPDEAlgorithm
-    chain::C
-    opt::O
-    init_params::P
+@concrete struct NNODE
+    chain <: AbstractLuxLayer
+    opt
+    init_params
     autodiff::Bool
-    batch::B
-    strategy::S
-    param_estim::PE
-    additional_loss::AL
-    kwargs::K
+    batch
+    strategy <: Union{Nothing, AbstractTrainingStrategy}
+    param_estim
+    additional_loss <: Union{Nothing, Function}
+    kwargs
 end
-function NNODE(chain, opt, init_params = nothing;
-        strategy = nothing,
-        autodiff = false, batch = true, param_estim = false, additional_loss = nothing, kwargs...)
-    !(chain isa Lux.AbstractExplicitLayer) &&
-        (chain = adapt(FromFluxAdaptor(false, false), chain))
-    NNODE(chain, opt, init_params, autodiff, batch,
+
+function NNODE(chain, opt, init_params = nothing; strategy = nothing, autodiff = false,
+        batch = true, param_estim = false, additional_loss = nothing, kwargs...)
+    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
+    return NNODE(chain, opt, init_params, autodiff, batch,
         strategy, param_estim, additional_loss, kwargs)
 end
 
 """
-    ODEPhi(chain::Lux.AbstractExplicitLayer, t, u0, st)
+    ODEPhi(chain::Lux.AbstractLuxLayer, t, u0, st)
 
-Internal struct, used for representing the ODE solution as a neural network in a form that respects boundary conditions, i.e.
-`phi(t) = u0 + t*NN(t)`.
+Internal struct, used for representing the ODE solution as a neural network in a form that
+respects boundary conditions, i.e. `phi(t) = u0 + t*NN(t)`.
 """
-mutable struct ODEPhi{C, T, U, S}
-    chain::C
-    t0::T
-    u0::U
-    st::S
-    function ODEPhi(chain::Lux.AbstractExplicitLayer, t::Number, u0, st)
-        new{typeof(chain), typeof(t), typeof(u0), typeof(st)}(chain, t, u0, st)
-    end
+@concrete struct ODEPhi
+    u0
+    t0
+    smodel <: StatefulLuxLayer
+end
+
+function ODEPhi(model::AbstractLuxLayer, t0::Number, u0, st)
+    return ODEPhi(u0, t0, StatefulLuxLayer{true}(model, nothing, st))
 end
 
-function generate_phi_θ(chain::Lux.AbstractExplicitLayer, t, u0, init_params)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    isnothing(init_params) && (init_params = θ)
-    ODEPhi(chain, t, u0, st), init_params
+function generate_phi_θ(chain::AbstractLuxLayer, t, u0, ::Nothing)
+    θ, st = LuxCore.setup(Random.default_rng(), chain)
+    return ODEPhi(chain, t, u0, st), θ
 end
 
-function (f::ODEPhi{C, T, U})(t::Number,
-        θ) where {C <: Lux.AbstractExplicitLayer, T, U <: Number}
-    y, st = f.chain(
-        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), [t]), θ.depvar, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.u0 + (t - f.t0) * first(y)
+function generate_phi_θ(chain::AbstractLuxLayer, t, u0, init_params)
+    st = LuxCore.initialstates(Random.default_rng(), chain)
+    return ODEPhi(chain, t, u0, st), init_params
 end
 
-function (f::ODEPhi{C, T, U})(t::AbstractVector,
-        θ) where {C <: Lux.AbstractExplicitLayer, T, U <: Number}
-    # Batch via data as row vectors
-    y, st = f.chain(
-        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), t'), θ.depvar, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.u0 .+ (t' .- f.t0) .* y
+function (f::ODEPhi)(t, θ)
+    dev = safe_get_device(θ)
+    return f(dev, safe_expand(dev, t), θ)
 end
 
-function (f::ODEPhi{C, T, U})(t::Number, θ) where {C <: Lux.AbstractExplicitLayer, T, U}
-    y, st = f.chain(
-        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), [t]), θ.depvar, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.u0 .+ (t .- f.t0) .* y
+function (f::ODEPhi{<:Number})(dev, t::Number, θ)
+    res = only(cdev(f.smodel(dev([t]), θ.depvar)))
+    return f.u0 + (t - f.t0) * res
 end
 
-function (f::ODEPhi{C, T, U})(t::AbstractVector,
-        θ) where {C <: Lux.AbstractExplicitLayer, T, U}
-    # Batch via data as row vectors
-    y, st = f.chain(
-        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), t'), θ.depvar, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.u0 .+ (t' .- f.t0) .* y
+function (f::ODEPhi{<:Number})(_, t::AbstractVector, θ)
+    return f.u0 .+ (t' .- f.t0) .* f.smodel(t', θ.depvar)
 end
 
+(f::ODEPhi)(dev, t::Number, θ) = dev(f.u0) .+ (t .- f.t0) .* f.smodel(dev([t]), θ.depvar)
+
+(f::ODEPhi)(dev, t::AbstractVector, θ) = dev(f.u0) .+ (t' .- f.t0) .* f.smodel(t', θ.depvar)
+
 """
     ode_dfdx(phi, t, θ, autodiff)
 
@@ -156,30 +152,16 @@ Computes u' using either forward-mode automatic differentiation or numerical dif
 """
 function ode_dfdx end
 
-function ode_dfdx(phi::ODEPhi{C, T, U}, t::Number, θ,
-        autodiff::Bool) where {C, T, U <: Number}
-    if autodiff
-        ForwardDiff.derivative(t -> phi(t, θ), t)
-    else
-        (phi(t + sqrt(eps(typeof(t))), θ) - phi(t, θ)) / sqrt(eps(typeof(t)))
-    end
-end
-
-function ode_dfdx(phi::ODEPhi{C, T, U}, t::Number, θ,
-        autodiff::Bool) where {C, T, U <: AbstractVector}
-    if autodiff
-        ForwardDiff.jacobian(t -> phi(t, θ), t)
-    else
-        (phi(t + sqrt(eps(typeof(t))), θ) - phi(t, θ)) / sqrt(eps(typeof(t)))
-    end
+function ode_dfdx(phi::ODEPhi{<:Number}, t::Number, θ, autodiff::Bool)
+    autodiff && return ForwardDiff.derivative(Base.Fix2(phi, θ), t)
+    ϵ = sqrt(eps(typeof(t)))
+    return (phi(t + ϵ, θ) - phi(t, θ)) / ϵ
 end
 
-function ode_dfdx(phi::ODEPhi, t::AbstractVector, θ, autodiff::Bool)
-    if autodiff
-        ForwardDiff.jacobian(t -> phi(t, θ), t)
-    else
-        (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
-    end
+function ode_dfdx(phi::ODEPhi, t, θ, autodiff::Bool)
+    autodiff && return ForwardDiff.jacobian(Base.Fix2(phi, θ), t)
+    ϵ = sqrt(eps(eltype(t)))
+    return (phi(t .+ ϵ, θ) .- phi(t, θ)) ./ ϵ
 end
 
 """
@@ -189,35 +171,22 @@ Simple L2 inner loss at a time `t` with parameters `θ` of the neural network.
 """
 function inner_loss end
 
-function inner_loss(phi::ODEPhi{C, T, U}, f, autodiff::Bool, t::Number, θ,
-        p, param_estim::Bool) where {C, T, U <: Number}
+function inner_loss(phi::ODEPhi, f, autodiff::Bool, t::Number, θ, p, param_estim::Bool)
     p_ = param_estim ? θ.p : p
-    sum(abs2, ode_dfdx(phi, t, θ, autodiff) - f(phi(t, θ), p_, t))
+    return sum(abs2, ode_dfdx(phi, t, θ, autodiff) .- f(phi(t, θ), p_, t))
 end
 
-function inner_loss(phi::ODEPhi{C, T, U}, f, autodiff::Bool, t::AbstractVector, θ,
-        p, param_estim::Bool) where {C, T, U <: Number}
+function inner_loss(
+        phi::ODEPhi, f, autodiff::Bool, t::AbstractVector, θ, p, param_estim::Bool)
     p_ = param_estim ? θ.p : p
     out = phi(t, θ)
-    fs = reduce(hcat, [f(out[i], p_, t[i]) for i in axes(out, 2)])
-    dxdtguess = Array(ode_dfdx(phi, t, θ, autodiff))
-    sum(abs2, dxdtguess .- fs) / length(t)
-end
-
-function inner_loss(phi::ODEPhi{C, T, U}, f, autodiff::Bool, t::Number, θ,
-        p, param_estim::Bool) where {C, T, U}
-    p_ = param_estim ? θ.p : p
-    sum(abs2, ode_dfdx(phi, t, θ, autodiff) .- f(phi(t, θ), p_, t))
-end
-
-function inner_loss(phi::ODEPhi{C, T, U}, f, autodiff::Bool, t::AbstractVector, θ,
-        p, param_estim::Bool) where {C, T, U}
-    p_ = param_estim ? θ.p : p
-    out = Array(phi(t, θ))
-    arrt = Array(t)
-    fs = reduce(hcat, [f(out[:, i], p_, arrt[i]) for i in 1:size(out, 2)])
-    dxdtguess = Array(ode_dfdx(phi, t, θ, autodiff))
-    sum(abs2, dxdtguess .- fs) / length(t)
+    fs = if phi.u0 isa Number
+        reduce(hcat, [f(out[i], p_, tᵢ) for (i, tᵢ) in enumerate(t)])
+    else
+        reduce(hcat, [f(out[:, i], p_, tᵢ) for (i, tᵢ) in enumerate(t)])
+    end
+    dxdtguess = ode_dfdx(phi, t, θ, autodiff)
+    return sum(abs2, fs .- dxdtguess) / length(t)
 end
 
 """
@@ -230,16 +199,17 @@ function generate_loss(strategy::QuadratureTraining, phi, f, autodiff::Bool, tsp
     integrand(t::Number, θ) = abs2(inner_loss(phi, f, autodiff, t, θ, p, param_estim))
 
     function integrand(ts, θ)
-        [abs2(inner_loss(phi, f, autodiff, t, θ, p, param_estim)) for t in ts]
+        return [abs2(inner_loss(phi, f, autodiff, t, θ, p, param_estim)) for t in ts]
     end
 
     function loss(θ, _)
         intf = BatchIntegralFunction(integrand, max_batch = strategy.batch)
         intprob = IntegralProblem(intf, (tspan[1], tspan[2]), θ)
-        sol = solve(intprob, strategy.quadrature_alg; abstol = strategy.abstol,
-            reltol = strategy.reltol, maxiters = strategy.maxiters)
-        sol.u
+        sol = solve(intprob, strategy.quadrature_alg; strategy.abstol,
+            strategy.reltol, strategy.maxiters)
+        return sol.u
     end
+
     return loss
 end
 
@@ -247,99 +217,78 @@ function generate_loss(
         strategy::GridTraining, phi, f, autodiff::Bool, tspan, p, batch, param_estim::Bool)
     ts = tspan[1]:(strategy.dx):tspan[2]
     autodiff && throw(ArgumentError("autodiff not supported for GridTraining."))
-    function loss(θ, _)
-        if batch
-            inner_loss(phi, f, autodiff, ts, θ, p, param_estim)
-        else
-            sum([inner_loss(phi, f, autodiff, t, θ, p, param_estim) for t in ts])
-        end
-    end
-    return loss
+    batch && return (θ, _) -> inner_loss(phi, f, autodiff, ts, θ, p, param_estim)
+    return (θ, _) -> sum([inner_loss(phi, f, autodiff, t, θ, p, param_estim) for t in ts])
 end
 
 function generate_loss(strategy::StochasticTraining, phi, f, autodiff::Bool, tspan, p,
         batch, param_estim::Bool)
     autodiff && throw(ArgumentError("autodiff not supported for StochasticTraining."))
-    function loss(θ, _)
-        ts = adapt(parameterless_type(θ),
-            [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)])
+    return (θ, _) -> begin
+        T = promote_type(eltype(tspan[1]), eltype(tspan[2]))
+        ts = (tspan[2] - tspan[1]) .* rand(T, strategy.points) .+ tspan[1]
         if batch
             inner_loss(phi, f, autodiff, ts, θ, p, param_estim)
         else
             sum([inner_loss(phi, f, autodiff, t, θ, p, param_estim) for t in ts])
         end
     end
-    return loss
 end
 
 function generate_loss(
         strategy::WeightedIntervalTraining, phi, f, autodiff::Bool, tspan, p,
         batch, param_estim::Bool)
     autodiff && throw(ArgumentError("autodiff not supported for WeightedIntervalTraining."))
-    minT = tspan[1]
-    maxT = tspan[2]
-
+    minT, maxT = tspan
     weights = strategy.weights ./ sum(strategy.weights)
-
     N = length(weights)
-    points = strategy.points
-
     difference = (maxT - minT) / N
 
-    data = Float64[]
+    ts = eltype(difference)[]
     for (index, item) in enumerate(weights)
-        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
+        temp_data = rand(1, trunc(Int, strategy.points * item)) .* difference .+ minT .+
                     ((index - 1) * difference)
-        data = append!(data, temp_data)
+        append!(ts, temp_data)
     end
 
-    ts = data
-    function loss(θ, _)
-        if batch
-            inner_loss(phi, f, autodiff, ts, θ, p, param_estim)
-        else
-            sum([inner_loss(phi, f, autodiff, t, θ, p, param_estim) for t in ts])
-        end
-    end
-    return loss
+    batch && return (θ, _) -> inner_loss(phi, f, autodiff, ts, θ, p, param_estim)
+    return (θ, _) -> sum([inner_loss(phi, f, autodiff, t, θ, p, param_estim) for t in ts])
 end
 
 function evaluate_tstops_loss(phi, f, autodiff::Bool, tstops, p, batch, param_estim::Bool)
-    function loss(θ, _)
-        if batch
-            inner_loss(phi, f, autodiff, tstops, θ, p, param_estim)
-        else
-            sum([inner_loss(phi, f, autodiff, t, θ, p, param_estim) for t in tstops])
-        end
-    end
-    return loss
+    batch && return (θ, _) -> inner_loss(phi, f, autodiff, tstops, θ, p, param_estim)
+    return (θ, _) -> sum([inner_loss(phi, f, autodiff, t, θ, p, param_estim)
+                          for t in tstops])
 end
 
-function generate_loss(strategy::QuasiRandomTraining, phi, f, autodiff::Bool, tspan)
-    error("QuasiRandomTraining is not supported by NNODE since it's for high dimensional spaces only. Use StochasticTraining instead.")
+function generate_loss(::QuasiRandomTraining, phi, f, autodiff::Bool, tspan)
+    error("QuasiRandomTraining is not supported by NNODE since it's for high dimensional \
+           spaces only. Use StochasticTraining instead.")
 end
 
-struct NNODEInterpolation{T <: ODEPhi, T2}
-    phi::T
-    θ::T2
+@concrete struct NNODEInterpolation
+    phi <: ODEPhi
+    θ
 end
-(f::NNODEInterpolation)(t, idxs::Nothing, ::Type{Val{0}}, p, continuity) = f.phi(t, f.θ)
+
+(f::NNODEInterpolation)(t, ::Nothing, ::Type{Val{0}}, p, continuity) = f.phi(t, f.θ)
 (f::NNODEInterpolation)(t, idxs, ::Type{Val{0}}, p, continuity) = f.phi(t, f.θ)[idxs]
 
-function (f::NNODEInterpolation)(t::Vector, idxs::Nothing, ::Type{Val{0}}, p, continuity)
+function (f::NNODEInterpolation)(t::Vector, ::Nothing, ::Type{Val{0}}, p, continuity)
     out = f.phi(t, f.θ)
-    SciMLBase.RecursiveArrayTools.DiffEqArray([out[:, i] for i in axes(out, 2)], t)
+    return DiffEqArray([out[:, i] for i in axes(out, 2)], t)
 end
 
 function (f::NNODEInterpolation)(t::Vector, idxs, ::Type{Val{0}}, p, continuity)
     out = f.phi(t, f.θ)
-    SciMLBase.RecursiveArrayTools.DiffEqArray([out[idxs, i] for i in axes(out, 2)], t)
+    return DiffEqArray([out[idxs, i] for i in axes(out, 2)], t)
 end
 
 SciMLBase.interp_summary(::NNODEInterpolation) = "Trained neural network interpolation"
 SciMLBase.allowscomplex(::NNODE) = true
 
-function SciMLBase.__solve(prob::SciMLBase.AbstractODEProblem,
+function SciMLBase.__solve(
+        prob::SciMLBase.AbstractODEProblem,
         alg::NNODE,
         args...;
         dt = nothing,
@@ -351,76 +300,49 @@ function SciMLBase.__solve(prob::SciMLBase.AbstractODEProblem,
         verbose = false,
         saveat = nothing,
         maxiters = nothing,
-        tstops = nothing)
-    u0 = prob.u0
-    tspan = prob.tspan
-    f = prob.f
-    p = prob.p
+        tstops = nothing
+)
+    (; u0, tspan, f, p) = prob
     t0 = tspan[1]
-    param_estim = alg.param_estim
+    (; param_estim, chain, opt, autodiff, init_params, batch, additional_loss) = alg
 
-    #hidden layer
-    chain = alg.chain
-    opt = alg.opt
-    autodiff = alg.autodiff
-
-    #train points generation
-    init_params = alg.init_params
-
-    !(chain isa Lux.AbstractExplicitLayer) &&
-        error("Only Lux.AbstractExplicitLayer neural networks are supported")
     phi, init_params = generate_phi_θ(chain, t0, u0, init_params)
-    (recursive_eltype(init_params) <: Complex &&
-     alg.strategy isa QuadratureTraining) &&
+
+    (recursive_eltype(init_params) <: Complex && alg.strategy isa QuadratureTraining) &&
         error("QuadratureTraining cannot be used with complex parameters. Use other strategies.")
 
     init_params = if alg.param_estim
-        ComponentArrays.ComponentArray(;
-            depvar = ComponentArrays.ComponentArray(init_params), p = prob.p)
+        ComponentArray(; depvar = init_params, p)
     else
-        ComponentArrays.ComponentArray(;
-            depvar = ComponentArrays.ComponentArray(init_params))
+        ComponentArray(; depvar = init_params)
     end
 
-    isinplace(prob) &&
-        throw(error("The NNODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
-
-    try
-        phi(t0, init_params)
-    catch err
-        if isa(err, DimensionMismatch)
-            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
-        else
-            throw(err)
-        end
-    end
+    @assert !isinplace(prob) "The NNODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."
 
     strategy = if alg.strategy === nothing
         if dt !== nothing
             GridTraining(dt)
         else
             QuadratureTraining(; quadrature_alg = QuadGKJL(),
-                reltol = convert(eltype(u0), reltol),
-                abstol = convert(eltype(u0), abstol), maxiters = maxiters,
-                batch = 0)
+                reltol = convert(eltype(u0), reltol), abstol = convert(eltype(u0), abstol),
+                maxiters, batch = 0)
         end
     else
         alg.strategy
     end
 
-    batch = alg.batch
     inner_f = generate_loss(strategy, phi, f, autodiff, tspan, p, batch, param_estim)
-    additional_loss = alg.additional_loss
-    (param_estim && isnothing(additional_loss)) &&
+
+    (param_estim && additional_loss === nothing) &&
         throw(ArgumentError("Please provide `additional_loss` in `NNODE` for parameter estimation (`param_estim` is true)."))
 
     # Creates OptimizationFunction Object from total_loss
     function total_loss(θ, _)
         L2_loss = inner_f(θ, phi)
-        if !(additional_loss isa Nothing)
+        if additional_loss !== nothing
             L2_loss = L2_loss + additional_loss(phi, θ)
         end
-        if !(tstops isa Nothing)
+        if tstops !== nothing
             num_tstops_points = length(tstops)
             tstops_loss_func = evaluate_tstops_loss(
                 phi, f, autodiff, tstops, p, batch, param_estim)
@@ -440,20 +362,19 @@ function SciMLBase.__solve(prob::SciMLBase.AbstractODEProblem,
         return L2_loss
     end
 
-    # Choice of Optimization Algo for Training Strategies
-    opt_algo = if strategy isa QuadratureTraining
-        Optimization.AutoForwardDiff()
-    else
-        Optimization.AutoZygote()
-    end
-    # Creates OptimizationFunction Object from total_loss
+    opt_algo = ifelse(strategy isa QuadratureTraining, AutoForwardDiff(), AutoZygote())
     optf = OptimizationFunction(total_loss, opt_algo)
 
-    iteration = 0
+    plen = maxiters === nothing ? 6 : ndigits(maxiters)
     callback = function (p, l)
-        iteration += 1
-        verbose && println("Current loss is: $l, Iteration: $iteration")
-        l < abstol
+        if verbose
+            if maxiters === nothing
+                @printf("[NNODE]\tIter: [%*d]\tLoss: %g\n", plen, p.iter, l)
+            else
+                @printf("[NNODE]\tIter: [%*d/%d]\tLoss: %g\n", plen, p.iter, maxiters, l)
+            end
+        end
+        return l < abstol
     end
 
     optprob = OptimizationProblem(optf, init_params)
@@ -478,15 +399,13 @@ function SciMLBase.__solve(prob::SciMLBase.AbstractODEProblem,
         u = [phi(t, res.u) for t in ts]
     end
 
-    sol = SciMLBase.build_solution(prob, alg, ts, u;
-        k = res, dense = true,
-        interp = NNODEInterpolation(phi, res.u),
-        calculate_error = false,
-        retcode = ReturnCode.Success,
-        original = res,
-        resid = res.objective)
+    sol = SciMLBase.build_solution(prob, alg, ts, u; k = res, dense = true,
+        interp = NNODEInterpolation(phi, res.u), calculate_error = false,
+        retcode = ReturnCode.Success, original = res, resid = res.objective)
+
     SciMLBase.has_analytic(prob.f) &&
-        SciMLBase.calculate_solution_errors!(sol; timeseries_errors = true,
-            dense_errors = false)
-    sol
-end #solve
+        SciMLBase.calculate_solution_errors!(
+            sol; timeseries_errors = true, dense_errors = false)
+
+    return sol
+end
diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index 59480d8a60..15b426f0f1 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -1,43 +1,45 @@
-"""
-???
-"""
 struct LogOptions
-    log_frequency::Int64
+    log_frequency::Int
     # TODO: add in an option for saving plots in the log. this is currently not done because the type of plot is dependent on the PDESystem
     #       possible solution: pass in a plot function?
     #       this is somewhat important because we want to support plotting adaptive weights that depend on pde independent variables
     #       and not just one weight for each loss function, i.e. pde_loss_weights(i, t, x) and since this would be function-internal,
     #       we'd want the plot & log to happen internally as well
     #       plots of the learned function can happen in the outer callback, but we might want to offer that here too
-
-    SciMLBase.@add_kwonly function LogOptions(; log_frequency = 50)
-        new(convert(Int64, log_frequency))
-    end
 end
 
-"""This function is defined here as stubs to be overridden by the subpackage NeuralPDELogging if imported"""
-function logvector(logger, v::AbstractVector{R}, name::AbstractString,
-        step::Integer) where {R <: Real}
-    nothing
+LogOptions(; log_frequency = 50) = LogOptions(log_frequency)
+
+logvector(logger, v::AbstractVector{<:Real}, name::AbstractString, step::Integer) = nothing
+logscalar(logger, s::Real, name::AbstractString, step::Integer) = nothing
+
+"""
+An encoding of the test function phi that is used for calculating the PDE
+value at domain points x
+
+Fields:
+
+- `f`: A representation of the chain function.
+- `st`: The state of the Lux.AbstractLuxLayer. It should be updated on each call.
+"""
+@concrete struct Phi
+    smodel <: StatefulLuxLayer
 end
 
-"""This function is defined here as stubs to be overridden by the subpackage NeuralPDELogging if imported"""
-function logscalar(logger, s::R, name::AbstractString, step::Integer) where {R <: Real}
-    nothing
+function Phi(layer::AbstractLuxLayer)
+    return Phi(StatefulLuxLayer{true}(
+        layer, nothing, initialstates(Random.default_rng(), layer)))
 end
 
+(f::Phi)(x::Number, θ) = only(cdev(f([x], θ)))
+
+(f::Phi)(x::AbstractArray, θ) = f.smodel(safe_get_device(θ)(x), θ)
+
 """
-    PhysicsInformedNN(chain,
-                    strategy;
-                    init_params = nothing,
-                    phi = nothing,
-                    param_estim = false,
-                    additional_loss = nothing,
-                    adaptive_loss = nothing,
-                    logger = nothing,
-                    log_options = LogOptions(),
-                    iteration = nothing,
-                    kwargs...)
+    PhysicsInformedNN(chain, strategy; init_params = nothing, phi = nothing,
+                      param_estim = false, additional_loss = nothing,
+                      adaptive_loss = nothing, logger = nothing, log_options = LogOptions(),
+                      iteration = nothing, kwargs...)
 
 A `discretize` algorithm for the ModelingToolkit PDESystem interface, which transforms a
 `PDESystem` into an `OptimizationProblem` using the Physics-Informed Neural Networks (PINN)
@@ -45,10 +47,11 @@ methodology.
 
 ## Positional Arguments
 
-* `chain`: a vector of Lux/Flux chains with a d-dimensional input and a
-           1-dimensional output corresponding to each of the dependent variables. Note that this
-           specification respects the order of the dependent variables as specified in the PDESystem.
-           Flux chains will be converted to Lux internally using `adapt(FromFluxAdaptor(false, false), chain)`.
+* `chain`: a vector of Lux/Flux chains with a d-dimensional input and a 1-dimensional output
+           corresponding to each of the dependent variables. Note that this specification
+           respects the order of the dependent variables as specified in the PDESystem.
+           Flux chains will be converted to Lux internally using
+           `adapt(FromFluxAdaptor(), chain)`.
 * `strategy`: determines which training strategy will be used. See the Training Strategy
               documentation for more details.
 
@@ -59,252 +62,108 @@ methodology.
   will convert to Float64.
 * `phi`: a trial solution, specified as `phi(x,p)` where `x` is the coordinates vector for
   the dependent variable and `p` are the weights of the phi function (generally the weights
-  of the neural network defining `phi`). By default, this is generated from the `chain`. This
-  should only be used to more directly impose functional information in the training problem,
-  for example imposing the boundary condition by the test function formulation.
+  of the neural network defining `phi`). By default, this is generated from the `chain`.
+  This should only be used to more directly impose functional information in the training
+  problem, for example imposing the boundary condition by the test function formulation.
 * `adaptive_loss`: the choice for the adaptive loss function. See the
   [adaptive loss page](@ref adaptive_loss) for more details. Defaults to no adaptivity.
 * `additional_loss`: a function `additional_loss(phi, θ, p_)` where `phi` are the neural
   network trial solutions, `θ` are the weights of the neural network(s), and `p_` are the
-  hyperparameters of the `OptimizationProblem`. If `param_estim = true`, then `θ` additionally
-  contains the parameters of the differential equation appended to the end of the vector.
+  hyperparameters of the `OptimizationProblem`. If `param_estim = true`, then `θ`
+  additionally contains the parameters of the differential equation appended to the end of
+  the vector.
 * `param_estim`: whether the parameters of the differential equation should be included in
   the values sent to the `additional_loss` function. Defaults to `false`.
 * `logger`: ?? needs docs
 * `log_options`: ?? why is this separate from the logger?
 * `iteration`: used to control the iteration counter???
-* `kwargs`: Extra keyword arguments which are splatted to the `OptimizationProblem` on `solve`.
+* `kwargs`: Extra keyword arguments which are splatted to the `OptimizationProblem` on
+  `solve`.
 """
-struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: AbstractPINN
-    chain::Any
-    strategy::T
-    init_params::P
-    phi::PH
-    derivative::DER
-    param_estim::PE
-    additional_loss::AL
-    adaptive_loss::ADA
-    logger::LOG
+@concrete struct PhysicsInformedNN <: AbstractPINN
+    chain <: Union{AbstractLuxLayer, AbstractArray{<:AbstractLuxLayer}}
+    strategy <: Union{Nothing, AbstractTrainingStrategy}
+    init_params
+    phi <: Union{Phi, AbstractArray{<:Phi}}
+    derivative
+    param_estim
+    additional_loss
+    adaptive_loss
+    logger
     log_options::LogOptions
-    iteration::Vector{Int64}
+    iteration
     self_increment::Bool
     multioutput::Bool
-    kwargs::K
-
-    @add_kwonly function PhysicsInformedNN(chain,
-            strategy;
-            init_params = nothing,
-            phi = nothing,
-            derivative = nothing,
-            param_estim = false,
-            additional_loss = nothing,
-            adaptive_loss = nothing,
-            logger = nothing,
-            log_options = LogOptions(),
-            iteration = nothing,
-            kwargs...)
-        multioutput = chain isa AbstractArray
-        if multioutput
-            !all(i -> i isa Lux.AbstractExplicitLayer, chain) &&
-                (chain = Lux.transform.(chain))
-        else
-            !(chain isa Lux.AbstractExplicitLayer) &&
-                (chain = adapt(FromFluxAdaptor(false, false), chain))
-        end
-        if phi === nothing
-            if multioutput
-                _phi = Phi.(chain)
-            else
-                _phi = Phi(chain)
-            end
-        else
-            if multioutput
-                all([phi.f[i] isa Lux.AbstractExplicitLayer for i in eachindex(phi.f)]) ||
-                    throw(ArgumentError("Only Lux Chains are supported"))
-            else
-                (phi.f isa Lux.AbstractExplicitLayer) ||
-                    throw(ArgumentError("Only Lux Chains are supported"))
-            end
-            _phi = phi
-        end
+    kwargs
+end
 
-        if derivative === nothing
-            _derivative = numeric_derivative
-        else
-            _derivative = derivative
+function PhysicsInformedNN(
+        chain, strategy; init_params = nothing, derivative = nothing, param_estim = false,
+        phi::Union{Nothing, Phi, AbstractArray{<:Phi}} = nothing, additional_loss = nothing,
+        adaptive_loss = nothing, logger = nothing, log_options = LogOptions(),
+        iteration = nothing, kwargs...)
+    multioutput = chain isa AbstractArray
+    if multioutput
+        chain = map(chain) do cᵢ
+            cᵢ isa AbstractLuxLayer && return cᵢ
+            return FromFluxAdaptor()(cᵢ)
         end
+    else
+        chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
+    end
 
-        if iteration isa Vector{Int64}
-            self_increment = false
-        else
-            iteration = [1]
-            self_increment = true
-        end
+    phi = phi === nothing ? (multioutput ? map(Phi, chain) : Phi(chain)) : phi
 
-        new{typeof(strategy), typeof(init_params), typeof(_phi), typeof(_derivative),
-            typeof(param_estim),
-            typeof(additional_loss), typeof(adaptive_loss), typeof(logger), typeof(kwargs)}(
-            chain,
-            strategy,
-            init_params,
-            _phi,
-            _derivative,
-            param_estim,
-            additional_loss,
-            adaptive_loss,
-            logger,
-            log_options,
-            iteration,
-            self_increment,
-            multioutput,
-            kwargs)
+    derivative = ifelse(derivative === nothing, numeric_derivative, derivative)
+
+    if iteration isa Vector{Int}
+        @assert length(iteration) == 1
+        iteration = Ref(iteration, 1)
+        self_increment = false
+    elseif iteration isa Ref
+        self_increment = false
+    else
+        iteration = Ref(1)
+        self_increment = true
     end
+
+    return PhysicsInformedNN(chain, strategy, init_params, phi, derivative, param_estim,
+        additional_loss, adaptive_loss, logger, log_options, iteration, self_increment,
+        multioutput, kwargs)
 end
 
 """
-    BayesianPINN(chain,
-                  strategy;
-                  init_params = nothing,
-                  phi = nothing,
-                  param_estim = false,
-                  additional_loss = nothing,
-                  adaptive_loss = nothing,
-                  logger = nothing,
-                  log_options = LogOptions(),
-                  iteration = nothing,
-                  dataset = nothing,
-                  kwargs...)
+    BayesianPINN(args...; dataset = nothing, kwargs...)
 
 A `discretize` algorithm for the ModelingToolkit PDESystem interface, which transforms a
-`PDESystem` into a likelihood function used for HMC based Posterior Sampling Algorithms [AdvancedHMC.jl](https://turinglang.org/AdvancedHMC.jl/stable/)
-which is later optimized upon to give the Solution Distribution of the PDE, using the Physics-Informed Neural Networks (PINN)
-methodology.
-
-## Positional Arguments
+`PDESystem` into a likelihood function used for HMC based Posterior Sampling Algorithms
+[AdvancedHMC.jl](https://turinglang.org/AdvancedHMC.jl/stable/) which is later optimized
+upon to give the Solution Distribution of the PDE, using the Physics-Informed Neural
+Networks (PINN) methodology.
 
-* `chain`: a vector of Lux.jl chains with a d-dimensional input and a
-  1-dimensional output corresponding to each of the dependent variables. Note that this
-  specification respects the order of the dependent variables as specified in the PDESystem.
-* `strategy`: determines which training strategy will be used. See the Training Strategy
-  documentation for more details.
+All positional arguments and keyword arguments are passed to `PhysicsInformedNN` except
+the ones mentioned below.
 
 ## Keyword Arguments
 
-* `Dataset`: A vector of matrix, each matrix for ith dependant
-  variable and first col in matrix is for dependant variables,
-  remaining columns for independent variables. Needed for inverse problem solving.
-* `init_params`: the initial parameters of the neural networks. If `init_params` is not
-  given, then the neural network default parameters are used. Note that for Lux, the default
-  will convert to Float64.
-* `phi`: a trial solution, specified as `phi(x,p)` where `x` is the coordinates vector for
-  the dependent variable and `p` are the weights of the phi function (generally the weights
-  of the neural network defining `phi`). By default, this is generated from the `chain`. This
-  should only be used to more directly impose functional information in the training problem,
-  for example imposing the boundary condition by the test function formulation.
-* `adaptive_loss`: (STILL WIP), the choice for the adaptive loss function. See the
-  [adaptive loss page](@ref adaptive_loss) for more details. Defaults to no adaptivity.
-* `additional_loss`: a function `additional_loss(phi, θ, p_)` where `phi` are the neural
-  network trial solutions, `θ` are the weights of the neural network(s), and `p_` are the
-  hyperparameters . If `param_estim = true`, then `θ` additionally
-  contains the parameters of the differential equation appended to the end of the vector.
-* `param_estim`: whether the parameters of the differential equation should be included in
-  the values sent to the `additional_loss` function. Defaults to `false`.
-* `logger`: ?? needs docs
-* `log_options`: ?? why is this separate from the logger?
-* `iteration`: used to control the iteration counter???
-* `kwargs`: Extra keyword arguments.
+* `dataset`: A vector of matrix, each matrix for ith dependant variable and first col in
+  matrix is for dependant variables, remaining columns for independent variables. Needed for
+  inverse problem solving.
 """
-struct BayesianPINN{T, P, PH, DER, PE, AL, ADA, LOG, D, K} <: AbstractPINN
-    chain::Any
-    strategy::T
-    init_params::P
-    phi::PH
-    derivative::DER
-    param_estim::PE
-    additional_loss::AL
-    adaptive_loss::ADA
-    logger::LOG
-    log_options::LogOptions
-    iteration::Vector{Int64}
-    self_increment::Bool
-    multioutput::Bool
-    dataset::D
-    kwargs::K
-
-    @add_kwonly function BayesianPINN(chain,
-            strategy;
-            init_params = nothing,
-            phi = nothing,
-            derivative = nothing,
-            param_estim = false,
-            additional_loss = nothing,
-            adaptive_loss = nothing,
-            logger = nothing,
-            log_options = LogOptions(),
-            iteration = nothing,
-            dataset = nothing,
-            kwargs...)
-        multioutput = chain isa AbstractArray
-        if multioutput
-            !all(i -> i isa Lux.AbstractExplicitLayer, chain) &&
-                (chain = Lux.transform.(chain))
-        else
-            !(chain isa Lux.AbstractExplicitLayer) &&
-                (chain = adapt(FromFluxAdaptor(false, false), chain))
-        end
-        if phi === nothing
-            if multioutput
-                _phi = Phi.(chain)
-            else
-                _phi = Phi(chain)
-            end
-        else
-            if multioutput
-                all([phi.f[i] isa Lux.AbstractExplicitLayer for i in eachindex(phi.f)]) ||
-                    throw(ArgumentError("Only Lux Chains are supported"))
-            else
-                (phi.f isa Lux.AbstractExplicitLayer) ||
-                    throw(ArgumentError("Only Lux Chains are supported"))
-            end
-            _phi = phi
-        end
-
-        if derivative === nothing
-            _derivative = numeric_derivative
-        else
-            _derivative = derivative
-        end
-
-        if iteration isa Vector{Int64}
-            self_increment = false
-        else
-            iteration = [1]
-            self_increment = true
-        end
+@concrete struct BayesianPINN <: AbstractPINN
+    pinn <: PhysicsInformedNN
+    dataset
+end
 
-        if dataset isa Nothing
-            dataset = (nothing, nothing)
-        end
+function Base.getproperty(pinn::BayesianPINN, name::Symbol)
+    name === :dataset && return getfield(pinn, :dataset)
+    name === :pinn && return getfield(pinn, :pinn)
+    return getproperty(pinn.pinn, name)
+end
 
-        new{typeof(strategy), typeof(init_params), typeof(_phi), typeof(_derivative),
-            typeof(param_estim),
-            typeof(additional_loss), typeof(adaptive_loss), typeof(logger), typeof(dataset),
-            typeof(kwargs)}(chain,
-            strategy,
-            init_params,
-            _phi,
-            _derivative,
-            param_estim,
-            additional_loss,
-            adaptive_loss,
-            logger,
-            log_options,
-            iteration,
-            self_increment,
-            multioutput,
-            dataset,
-            kwargs)
-    end
+function BayesianPINN(args...; dataset = nothing, kwargs...)
+    dataset === nothing && (dataset = (nothing, nothing))
+    return BayesianPINN(PhysicsInformedNN(args...; kwargs...), dataset)
 end
 
 """
@@ -385,7 +244,7 @@ mutable struct PINNRepresentation
     """
     The iteration counter used inside the cost function
     """
-    iteration::Vector{Int}
+    iteration::Any
     """
     The initial parameters as provided by the user. If the PDE is a system of PDEs, this
     will be an array of arrays. If Lux.jl is used, then this is an array of ComponentArrays.
@@ -486,49 +345,13 @@ struct PINNLossFunctions
     datafree_bc_loss_functions::Any
 end
 
-"""
-An encoding of the test function phi that is used for calculating the PDE
-value at domain points x
-
-Fields:
-
-- `f`: A representation of the chain function.
-- `st`: The state of the Lux.AbstractExplicitLayer. It should be updated on each call.
-"""
-mutable struct Phi{C, S}
-    f::C
-    st::S
-    function Phi(chain::Lux.AbstractExplicitLayer)
-        st = Lux.initialstates(Random.default_rng(), chain)
-        new{typeof(chain), typeof(st)}(chain, st)
-    end
-end
-
-function (f::Phi{<:Lux.AbstractExplicitLayer})(x::Number, θ)
-    y, st = f.f(adapt(parameterless_type(ComponentArrays.getdata(θ)), [x]), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    y
-end
-
-function (f::Phi{<:Lux.AbstractExplicitLayer})(x::AbstractArray, θ)
-    y, st = f.f(adapt(parameterless_type(ComponentArrays.getdata(θ)), x), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    y
-end
-
-function get_u()
-    u = (cord, θ, phi) -> phi(cord, θ)
-end
+get_u() = (cord, θ, phi) -> phi(cord, θ)
 
 # the method to calculate the derivative
 function numeric_derivative(phi, u, x, εs, order, θ)
-    _type = parameterless_type(ComponentArrays.getdata(θ))
-
     ε = εs[order]
     _epsilon = inv(first(ε[ε .!= zero(ε)]))
-
-    ε = adapt(_type, ε)
-    x = adapt(_type, x)
+    ε = ε |> safe_get_device(x)
 
     # any(x->x!=εs[1],εs)
     # εs is the epsilon for each order, if they are all the same then we use a fancy formula
diff --git a/src/rode_solve.jl b/src/rode_solve.jl
deleted file mode 100644
index 863a0d1be9..0000000000
--- a/src/rode_solve.jl
+++ /dev/null
@@ -1,116 +0,0 @@
-struct NNRODE{C, W, O, P, K} <: NeuralPDEAlgorithm
-    chain::C
-    W::W
-    opt::O
-    init_params::P
-    autodiff::Bool
-    kwargs::K
-end
-function NNRODE(chain, W, opt = Optim.BFGS(), init_params = nothing; autodiff = false,
-        kwargs...)
-    if init_params === nothing
-        if chain isa Flux.Chain
-            init_params, re = Flux.destructure(chain)
-        else
-            error("Only Flux is support here right now")
-        end
-    else
-        init_params = init_params
-    end
-    NNRODE(chain, W, opt, init_params, autodiff, kwargs)
-end
-
-function SciMLBase.solve(prob::SciMLBase.AbstractRODEProblem,
-        alg::NeuralPDEAlgorithm,
-        args...;
-        dt,
-        timeseries_errors = true,
-        save_everystep = true,
-        adaptive = false,
-        abstol = 1.0f-6,
-        verbose = false,
-        maxiters = 100)
-    SciMLBase.isinplace(prob) && error("Only out-of-place methods are allowed!")
-
-    u0 = prob.u0
-    tspan = prob.tspan
-    f = prob.f
-    p = prob.p
-    t0 = tspan[1]
-
-    #hidden layer
-    chain = alg.chain
-    opt = alg.opt
-    autodiff = alg.autodiff
-    Wg = alg.W
-    #train points generation
-    ts = tspan[1]:dt:tspan[2]
-    init_params = alg.init_params
-
-    if chain isa FastChain
-        #The phi trial solution
-        if u0 isa Number
-            phi = (t, W, θ) -> u0 +
-                               (t - tspan[1]) *
-                               first(chain(adapt(SciMLBase.parameterless_type(θ), [t, W]),
-                θ))
-        else
-            phi = (t, W, θ) -> u0 +
-                               (t - tspan[1]) *
-                               chain(adapt(SciMLBase.parameterless_type(θ), [t, W]), θ)
-        end
-    else
-        _, re = Flux.destructure(chain)
-        #The phi trial solution
-        if u0 isa Number
-            phi = (t, W, θ) -> u0 +
-                               (t - t0) *
-                               first(re(θ)(adapt(SciMLBase.parameterless_type(θ), [t, W])))
-        else
-            phi = (t, W, θ) -> u0 +
-                               (t - t0) *
-                               re(θ)(adapt(SciMLBase.parameterless_type(θ), [t, W]))
-        end
-    end
-
-    if autodiff
-        # dfdx = (t,W,θ) -> ForwardDiff.derivative(t->phi(t,θ),t)
-    else
-        dfdx = (t, W, θ) -> (phi(t + sqrt(eps(t)), W, θ) - phi(t, W, θ)) / sqrt(eps(t))
-    end
-
-    function inner_loss(t, W, θ)
-        sum(abs, dfdx(t, W, θ) - f(phi(t, W, θ), p, t, W))
-    end
-    Wprob = NoiseProblem(Wg, tspan)
-    Wsol = solve(Wprob; dt = dt)
-    W = NoiseGrid(ts, Wsol.W)
-    function loss(θ)
-        sum(abs2, inner_loss(ts[i], W.W[i], θ) for i in 1:length(ts)) # sum(abs2,phi(tspan[1],θ) - u0)
-    end
-
-    callback = function (p, l)
-        Wprob = NoiseProblem(Wg, tspan)
-        Wsol = solve(Wprob; dt = dt)
-        W = NoiseGrid(ts, Wsol.W)
-        verbose && println("Current loss is: $l")
-        l < abstol
-    end
-    #res = DiffEqFlux.sciml_train(loss, init_params, opt; cb = callback, maxiters = maxiters,
-    #                             alg.kwargs...)
-
-    #solutions at timepoints
-    noiseproblem = NoiseProblem(Wg, tspan)
-    W = solve(noiseproblem; dt = dt)
-    if u0 isa Number
-        u = [(phi(ts[i], W.W[i], res.minimizer)) for i in 1:length(ts)]
-    else
-        u = [(phi(ts[i], W.W[i], res.minimizer)) for i in 1:length(ts)]
-    end
-
-    sol = SciMLBase.build_solution(prob, alg, ts, u, W = W, calculate_error = false)
-    SciMLBase.has_analytic(prob.f) &&
-        SciMLBase.calculate_solution_errors!(sol; timeseries_errors = true,
-            dense_errors = false)
-    sol
-end #solve
diff --git a/src/symbolic_utilities.jl b/src/symbolic_utilities.jl
index c78ddeff83..9bd6e70cf6 100644
--- a/src/symbolic_utilities.jl
+++ b/src/symbolic_utilities.jl
@@ -115,11 +115,8 @@ where
 - θ - weights in neural network.
 """
 function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = false,
-        dict_transformation_vars = nothing,
-        transformation_vars = nothing)
-    @unpack indvars, depvars, dict_indvars, dict_depvars,
-    dict_depvar_input, multioutput, strategy, phi,
-    derivative, integral, flat_init_params, init_params = pinnrep
+        dict_transformation_vars = nothing, transformation_vars = nothing)
+    (; indvars, depvars, dict_indvars, dict_depvars, dict_depvar_input, multioutput, strategy, phi, derivative, integral, flat_init_params, init_params) = pinnrep
     eltypeθ = eltype(flat_init_params)
 
     _args = ex.args
@@ -141,10 +138,10 @@ function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = fa
                     ]
                 end
                 break
-            elseif e isa ModelingToolkit.Differential
+            elseif e isa Differential
                 derivative_variables = Symbol[]
                 order = 0
-                while (_args[1] isa ModelingToolkit.Differential)
+                while (_args[1] isa Differential)
                     order += 1
                     push!(derivative_variables, toexpr(_args[1].x))
                     _args = _args[2].args
@@ -230,7 +227,7 @@ function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = fa
                     if l isa Number
                         push!(lb_, l)
                     else
-                        l_expr = NeuralPDE.build_symbolic_loss_function(pinnrep, nothing;
+                        l_expr = build_symbolic_loss_function(pinnrep, nothing;
                             integrand = _dot_(l),
                             integrating_depvars = integrating_depvars,
                             param_estim = false,
@@ -243,7 +240,7 @@ function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = fa
                     if u_ isa Number
                         push!(ub_, u_)
                     else
-                        u_expr = NeuralPDE.build_symbolic_loss_function(pinnrep, nothing;
+                        u_expr = build_symbolic_loss_function(pinnrep, nothing;
                             integrand = _dot_(u_),
                             integrating_depvars = integrating_depvars,
                             param_estim = false,
@@ -344,18 +341,18 @@ function pair(eq, depvars, dict_depvars, dict_depvar_input)
 end
 
 function get_vars(indvars_, depvars_)
-    indvars = ModelingToolkit.getname.(indvars_)
+    indvars = SymbolicIndexingInterface.getname.(indvars_)
     depvars = Symbol[]
     dict_depvar_input = Dict{Symbol, Vector{Symbol}}()
     for d in depvars_
         if unwrap(d) isa SymbolicUtils.BasicSymbolic
-            dname = ModelingToolkit.getname(d)
+            dname = SymbolicIndexingInterface.getname(d)
             push!(depvars, dname)
             push!(dict_depvar_input,
                 dname => [nameof(unwrap(argument))
                           for argument in arguments(unwrap(d))])
         else
-            dname = ModelingToolkit.getname(d)
+            dname = SymbolicIndexingInterface.getname(d)
             push!(depvars, dname)
             push!(dict_depvar_input, dname => indvars) # default to all inputs if not given
         end
@@ -427,9 +424,8 @@ function get_argument end
 
 # Get arguments from boundary condition functions
 function get_argument(eqs, _indvars::Array, _depvars::Array)
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-        _depvars)
-    get_argument(eqs, dict_indvars, dict_depvars)
+    _, _, dict_indvars, dict_depvars, _ = get_vars(_indvars, _depvars)
+    return get_argument(eqs, dict_indvars, dict_depvars)
 end
 function get_argument(eqs, dict_indvars, dict_depvars)
     exprs = toexpr.(eqs)
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index 858e93a237..974f2529fa 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -10,76 +10,64 @@ corresponding to the grid spacing in each dimension.
 
 * `dx`: the discretization of the grid.
 """
-struct GridTraining{T} <: AbstractTrainingStrategy
-    dx::T
+@concrete struct GridTraining <: AbstractTrainingStrategy
+    dx
 end
 
 # include dataset points in pde_residual loglikelihood (BayesianPINN)
 function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
-        strategy::GridTraining,
-        datafree_pde_loss_function,
+        strategy::GridTraining, datafree_pde_loss_function,
         datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc = nothing)
-    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
-
-    eltypeθ = eltype(pinnrep.flat_init_params)
-
-    # is vec as later each _set in pde_train_sets are columns as points transformed to vector of points (pde_train_sets must be rowwise)
-    pde_loss_functions = if !(train_sets_pde isa Nothing)
-        pde_train_sets = [train_set[:, 2:end] for train_set in train_sets_pde]
-        pde_train_sets = adapt.(
-            parameterless_type(ComponentArrays.getdata(flat_init_params)),
-            pde_train_sets)
-        [get_loss_function(_loss, _set, eltypeθ, strategy)
-         for (_loss, _set) in zip(datafree_pde_loss_function,
-            pde_train_sets)]
+    eltypeθ = recursive_eltype(pinnrep.flat_init_params)
+    adaptor = EltypeAdaptor{eltypeθ}()
+
+    # is vec as later each _set in pde_train_sets are columns as points transformed to
+    # vector of points (pde_train_sets must be rowwise)
+    pde_loss_functions = if train_sets_pde !== nothing
+        pde_train_sets = [train_set[:, 2:end] for train_set in train_sets_pde] |> adaptor
+        [get_loss_function(pinnrep, _loss, _set, eltypeθ, strategy)
+         for (_loss, _set) in zip(datafree_pde_loss_function, pde_train_sets)]
     else
         nothing
     end
 
-    bc_loss_functions = if !(train_sets_bc isa Nothing)
-        bcs_train_sets = [train_set[:, 2:end] for train_set in train_sets_bc]
-        bcs_train_sets = adapt.(
-            parameterless_type(ComponentArrays.getdata(flat_init_params)),
-            bcs_train_sets)
-        [get_loss_function(_loss, _set, eltypeθ, strategy)
+    bc_loss_functions = if train_sets_bc !== nothing
+        bcs_train_sets = [train_set[:, 2:end] for train_set in train_sets_bc] |> adaptor
+        [get_loss_function(pinnrep, _loss, _set, eltypeθ, strategy)
          for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
     else
         nothing
     end
 
-    pde_loss_functions, bc_loss_functions
+    return pde_loss_functions, bc_loss_functions
 end
 
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
-        strategy::GridTraining,
-        datafree_pde_loss_function,
-        datafree_bc_loss_function)
-    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
-    dx = strategy.dx
-    eltypeθ = eltype(pinnrep.flat_init_params)
+        strategy::GridTraining, datafree_pde_loss_function, datafree_bc_loss_function)
+    (; domains, eqs, bcs, dict_indvars, dict_depvars) = pinnrep
+    eltypeθ = recursive_eltype(pinnrep.flat_init_params)
+    adaptor = EltypeAdaptor{eltypeθ}()
 
-    train_sets = generate_training_sets(domains, dx, eqs, bcs, eltypeθ,
+    train_sets = generate_training_sets(domains, strategy.dx, eqs, bcs, eltypeθ,
         dict_indvars, dict_depvars)
 
     # the points in the domain and on the boundary
-    pde_train_sets, bcs_train_sets = train_sets
-    pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-        pde_train_sets)
-    bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-        bcs_train_sets)
-    pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
-                          for (_loss, _set) in zip(datafree_pde_loss_function,
-        pde_train_sets)]
-
-    bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
+    pde_train_sets, bcs_train_sets = train_sets |> adaptor
+    pde_loss_functions = [get_loss_function(pinnrep, _loss, _set, eltypeθ, strategy)
+                          for (_loss, _set) in zip(
+        datafree_pde_loss_function, pde_train_sets)]
+
+    bc_loss_functions = [get_loss_function(pinnrep, _loss, _set, eltypeθ, strategy)
                          for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
 
-    pde_loss_functions, bc_loss_functions
+    return pde_loss_functions, bc_loss_functions
 end
 
-function get_loss_function(loss_function, train_set, eltypeθ, strategy::GridTraining;
-        τ = nothing)
-    loss = (θ) -> mean(abs2, loss_function(train_set, θ))
+function get_loss_function(
+        init_params, loss_function, train_set, eltype0, ::GridTraining; τ = nothing)
+    init_params = init_params isa PINNRepresentation ? init_params.init_params : init_params
+    train_set = train_set |> safe_get_device(init_params) |> EltypeAdaptor{eltype0}()
+    return θ -> mean(abs2, loss_function(train_set, θ))
 end
 
 """
@@ -95,49 +83,44 @@ end
   (by default, it equals `points`).
 """
 struct StochasticTraining <: AbstractTrainingStrategy
-    points::Int64
-    bcs_points::Int64
+    points::Int
+    bcs_points::Int
 end
 
-function StochasticTraining(points; bcs_points = points)
-    StochasticTraining(points, bcs_points)
-end
+StochasticTraining(points; bcs_points = points) = StochasticTraining(points, bcs_points)
 
 function generate_random_points(points, bound, eltypeθ)
     lb, ub = bound
-    rand(eltypeθ, length(lb), points) .* (ub .- lb) .+ lb
+    return rand(eltypeθ, length(lb), points) .* (ub .- lb) .+ lb
 end
 
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
-        strategy::StochasticTraining,
-        datafree_pde_loss_function,
-        datafree_bc_loss_function)
-    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
+        strategy::StochasticTraining, datafree_pde_loss_function, datafree_bc_loss_function)
+    (; domains, eqs, bcs, dict_indvars, dict_depvars) = pinnrep
 
     eltypeθ = eltype(pinnrep.flat_init_params)
 
-    bounds = get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
-        strategy)
+    bounds = get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
     pde_bounds, bcs_bounds = bounds
 
-    pde_loss_functions = [get_loss_function(_loss, bound, eltypeθ, strategy)
+    pde_loss_functions = [get_loss_function(pinnrep, _loss, bound, eltypeθ, strategy)
                           for (_loss, bound) in zip(datafree_pde_loss_function, pde_bounds)]
 
-    bc_loss_functions = [get_loss_function(_loss, bound, eltypeθ, strategy)
+    bc_loss_functions = [get_loss_function(pinnrep, _loss, bound, eltypeθ, strategy)
                          for (_loss, bound) in zip(datafree_bc_loss_function, bcs_bounds)]
 
     pde_loss_functions, bc_loss_functions
 end
 
-function get_loss_function(loss_function, bound, eltypeθ, strategy::StochasticTraining;
-        τ = nothing)
-    points = strategy.points
-    loss = (θ) -> begin
-        sets = generate_random_points(points, bound, eltypeθ)
-        sets_ = adapt(parameterless_type(ComponentArrays.getdata(θ)), sets)
-        mean(abs2, loss_function(sets_, θ))
+function get_loss_function(init_params, loss_function, bound, eltypeθ,
+        strategy::StochasticTraining; τ = nothing)
+    init_params = init_params isa PINNRepresentation ? init_params.init_params : init_params
+    dev = safe_get_device(init_params)
+    return θ -> begin
+        sets = generate_random_points(strategy.points, bound, eltypeθ) |> dev |>
+               EltypeAdaptor{recursive_eltype(θ)}()
+        return mean(abs2, loss_function(sets, θ))
     end
-    return loss
 end
 
 """
@@ -158,94 +141,80 @@ that accelerate the convergence in high dimensional spaces over pure random sequ
 * `bcs_points`: the number of quasi-random points in a sample for boundary conditions
   (by default, it equals `points`),
 * `sampling_alg`: the quasi-Monte Carlo sampling algorithm,
-* `resampling`: if it's false - the full training set is generated in advance before training,
-   and at each iteration, one subset is randomly selected out of the batch.
-   If it's true - the training set isn't generated beforehand, and one set of quasi-random
-   points is generated directly at each iteration in runtime. In this case, `minibatch` has no effect,
-* `minibatch`: the number of subsets, if resampling == false.
+* `resampling`: if it's false - the full training set is generated in advance before
+  training, and at each iteration, one subset is randomly selected out of the batch.
+  If it's true - the training set isn't generated beforehand, and one set of quasi-random
+  points is generated directly at each iteration in runtime. In this case, `minibatch` has
+  no effect.
+* `minibatch`: the number of subsets, if `!resampling`.
 
 For more information, see [QuasiMonteCarlo.jl](https://docs.sciml.ai/QuasiMonteCarlo/stable/).
 """
-struct QuasiRandomTraining <: AbstractTrainingStrategy
-    points::Int64
-    bcs_points::Int64
-    sampling_alg::QuasiMonteCarlo.SamplingAlgorithm
+@concrete struct QuasiRandomTraining <: AbstractTrainingStrategy
+    points::Int
+    bcs_points::Int
+    sampling_alg <: QuasiMonteCarlo.SamplingAlgorithm
     resampling::Bool
-    minibatch::Int64
+    minibatch::Int
 end
 
 function QuasiRandomTraining(points; bcs_points = points,
-        sampling_alg = LatinHypercubeSample(), resampling = true,
-        minibatch = 0)
-    QuasiRandomTraining(points, bcs_points, sampling_alg, resampling, minibatch)
+        sampling_alg = LatinHypercubeSample(), resampling = true, minibatch = 0)
+    return QuasiRandomTraining(points, bcs_points, sampling_alg, resampling, minibatch)
 end
 
 function generate_quasi_random_points_batch(points, bound, eltypeθ, sampling_alg,
         minibatch)
     lb, ub = bound
-    set = QuasiMonteCarlo.generate_design_matrices(points, lb, ub, sampling_alg, minibatch)
-    set = map(s -> adapt(parameterless_type(eltypeθ), s), set)
-    return set
+    return QuasiMonteCarlo.generate_design_matrices(
+        points, lb, ub, sampling_alg, minibatch) |> EltypeAdaptor{eltypeθ}()
 end
 
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
-        strategy::QuasiRandomTraining,
-        datafree_pde_loss_function,
+        strategy::QuasiRandomTraining, datafree_pde_loss_function,
         datafree_bc_loss_function)
-    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
+    (; domains, eqs, bcs, dict_indvars, dict_depvars) = pinnrep
 
     eltypeθ = eltype(pinnrep.flat_init_params)
 
-    bounds = get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
-        strategy)
+    bounds = get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
     pde_bounds, bcs_bounds = bounds
 
-    pde_loss_functions = [get_loss_function(_loss, bound, eltypeθ, strategy)
+    pde_loss_functions = [get_loss_function(pinnrep, _loss, bound, eltypeθ, strategy)
                           for (_loss, bound) in zip(datafree_pde_loss_function, pde_bounds)]
 
-    strategy_ = QuasiRandomTraining(strategy.bcs_points;
-        sampling_alg = strategy.sampling_alg,
-        resampling = strategy.resampling,
-        minibatch = strategy.minibatch)
-    bc_loss_functions = [get_loss_function(_loss, bound, eltypeθ, strategy_)
+    strategy_ = QuasiRandomTraining(strategy.bcs_points; strategy.sampling_alg,
+        strategy.resampling, strategy.minibatch)
+    bc_loss_functions = [get_loss_function(pinnrep, _loss, bound, eltypeθ, strategy_)
                          for (_loss, bound) in zip(datafree_bc_loss_function, bcs_bounds)]
 
-    pde_loss_functions, bc_loss_functions
+    return pde_loss_functions, bc_loss_functions
 end
 
-function get_loss_function(loss_function, bound, eltypeθ, strategy::QuasiRandomTraining;
-        τ = nothing)
-    sampling_alg = strategy.sampling_alg
-    points = strategy.points
-    resampling = strategy.resampling
-    minibatch = strategy.minibatch
+function get_loss_function(init_params, loss_function, bound, eltypeθ,
+        strategy::QuasiRandomTraining; τ = nothing)
+    (; sampling_alg, points, resampling, minibatch) = strategy
 
-    point_batch = nothing
-    point_batch = if resampling == false
-        generate_quasi_random_points_batch(points, bound, eltypeθ, sampling_alg, minibatch)
-    end
-    loss = if resampling == true
+    init_params = init_params isa PINNRepresentation ? init_params.init_params : init_params
+    dev = safe_get_device(init_params)
+
+    return if resampling
         θ -> begin
-            sets = ChainRulesCore.@ignore_derivatives QuasiMonteCarlo.sample(points,
-                bound[1],
-                bound[2],
-                sampling_alg)
-            sets_ = adapt(parameterless_type(ComponentArrays.getdata(θ)), sets)
-            mean(abs2, loss_function(sets_, θ))
+            sets = @ignore_derivatives QuasiMonteCarlo.sample(
+                points, bound[1], bound[2], sampling_alg)
+            sets = sets |> dev |> EltypeAdaptor{eltypeθ}()
+            return mean(abs2, loss_function(sets, θ))
         end
     else
-        θ -> begin
-            sets_ = point_batch[rand(1:minibatch)]
-            sets__ = adapt(parameterless_type(ComponentArrays.getdata(θ)), sets_)
-            mean(abs2, loss_function(sets__, θ))
-        end
+        point_batch = generate_quasi_random_points_batch(
+                          points, bound, eltypeθ, sampling_alg, minibatch) |> dev |>
+                      EltypeAdaptor{eltypeθ}()
+        θ -> mean(abs2, loss_function(point_batch[rand(1:minibatch)], θ))
     end
-    return loss
 end
 
 """
-    QuadratureTraining(; quadrature_alg = CubatureJLh(),
-                        reltol = 1e-6, abstol = 1e-3,
+    QuadratureTraining(; quadrature_alg = CubatureJLh(), reltol = 1e-6, abstol = 1e-3,
                         maxiters = 1_000, batch = 100)
 
 A training strategy which treats the loss function as the integral of
@@ -265,13 +234,12 @@ number of points to evaluate in a given integrand call.
 For more information on the argument values and algorithm choices, see
 [Integrals.jl](https://docs.sciml.ai/Integrals/stable/).
 """
-struct QuadratureTraining{Q <: SciMLBase.AbstractIntegralAlgorithm, T} <:
-       AbstractTrainingStrategy
-    quadrature_alg::Q
+@concrete struct QuadratureTraining{T} <: AbstractTrainingStrategy
+    quadrature_alg <: SciMLBase.AbstractIntegralAlgorithm
     reltol::T
     abstol::T
-    maxiters::Int64
-    batch::Int64
+    maxiters::Int
+    batch::Int
 end
 
 function QuadratureTraining(; quadrature_alg = CubatureJLh(), reltol = 1e-3, abstol = 1e-6,
@@ -280,48 +248,44 @@ function QuadratureTraining(; quadrature_alg = CubatureJLh(), reltol = 1e-3, abs
 end
 
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
-        strategy::QuadratureTraining,
-        datafree_pde_loss_function,
-        datafree_bc_loss_function)
-    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
+        strategy::QuadratureTraining, datafree_pde_loss_function, datafree_bc_loss_function)
+    (; domains, eqs, bcs, dict_indvars, dict_depvars) = pinnrep
     eltypeθ = eltype(pinnrep.flat_init_params)
 
-    bounds = get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
-        strategy)
+    bounds = get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
     pde_bounds, bcs_bounds = bounds
 
     lbs, ubs = pde_bounds
-    pde_loss_functions = [get_loss_function(_loss, lb, ub, eltypeθ, strategy)
+    pde_loss_functions = [get_loss_function(pinnrep, _loss, lb, ub, eltypeθ, strategy)
                           for (_loss, lb, ub) in zip(datafree_pde_loss_function, lbs, ubs)]
     lbs, ubs = bcs_bounds
-    bc_loss_functions = [get_loss_function(_loss, lb, ub, eltypeθ, strategy)
+    bc_loss_functions = [get_loss_function(pinnrep, _loss, lb, ub, eltypeθ, strategy)
                          for (_loss, lb, ub) in zip(datafree_bc_loss_function, lbs, ubs)]
 
-    pde_loss_functions, bc_loss_functions
+    return pde_loss_functions, bc_loss_functions
 end
 
-function get_loss_function(loss_function, lb, ub, eltypeθ, strategy::QuadratureTraining;
-        τ = nothing)
+function get_loss_function(init_params, loss_function, lb, ub, eltypeθ,
+        strategy::QuadratureTraining; τ = nothing)
+    init_params = init_params isa PINNRepresentation ? init_params.init_params : init_params
+    dev = safe_get_device(init_params)
+
     if length(lb) == 0
-        loss = (θ) -> mean(abs2, loss_function(rand(eltypeθ, 1, 10), θ))
-        return loss
+        return (θ) -> mean(abs2, loss_function(dev(rand(eltypeθ, 1, 10)), θ))
     end
+
     area = eltypeθ(prod(abs.(ub .- lb)))
     f_ = (lb, ub, loss_, θ) -> begin
         function integrand(x, θ)
-            x = adapt(parameterless_type(ComponentArrays.getdata(θ)), x)
-            sum(abs2, view(loss_(x, θ), 1, :), dims = 2) #./ size_x
+            x = x |> dev |> EltypeAdaptor{eltypeθ}()
+            return sum(abs2, view(loss_(x, θ), 1, :), dims = 2) #./ size_x
         end
         integral_function = BatchIntegralFunction(integrand, max_batch = strategy.batch)
         prob = IntegralProblem(integral_function, (lb, ub), θ)
-        solve(prob,
-            strategy.quadrature_alg,
-            reltol = strategy.reltol,
-            abstol = strategy.abstol,
-            maxiters = strategy.maxiters)[1]
+        return solve(prob, strategy.quadrature_alg; strategy.reltol, strategy.abstol,
+            strategy.maxiters)[1]
     end
-    loss = (θ) -> 1 / area * f_(lb, ub, loss_function, θ)
-    return loss
+    return (θ) -> f_(lb, ub, loss_function, θ) / area
 end
 
 """
@@ -334,25 +298,22 @@ such that the total number of sampled points is equivalent to the given samples
 
 ## Positional Arguments
 
-* `weights`: A vector of weights that should sum to 1, representing the proportion of samples at each interval.
+* `weights`: A vector of weights that should sum to 1, representing the proportion of
+  samples at each interval.
 * `points`: the total number of samples that we want, across the entire time span
 
 ## Limitations
 
 This training strategy can only be used with ODEs (`NNODE`).
 """
-struct WeightedIntervalTraining{T} <: AbstractTrainingStrategy
+@concrete struct WeightedIntervalTraining{T} <: AbstractTrainingStrategy
     weights::Vector{T}
     points::Int
 end
 
-function WeightedIntervalTraining(weights, points)
-    WeightedIntervalTraining(weights, points)
-end
-
-function get_loss_function(loss_function, train_set, eltypeθ,
-        strategy::WeightedIntervalTraining;
-        τ = nothing)
-    loss = (θ) -> mean(abs2, loss_function(train_set, θ))
-    return loss
+function get_loss_function(init_params, loss_function, train_set, eltype0,
+        ::WeightedIntervalTraining; τ = nothing)
+    init_params = init_params isa PINNRepresentation ? init_params.init_params : init_params
+    train_set = train_set |> safe_get_device(init_params) |> EltypeAdaptor{eltype0}()
+    return (θ) -> mean(abs2, loss_function(train_set, θ))
 end
diff --git a/src/transform_inf_integral.jl b/src/transform_inf_integral.jl
index 75bc605f1b..d0c0007c80 100644
--- a/src/transform_inf_integral.jl
+++ b/src/transform_inf_integral.jl
@@ -104,11 +104,7 @@ function transform_inf_integral(lb, ub, integrating_ex, integrating_depvars,
         end
 
         dict_transformation_vars, transformation_vars, integrating_var_transformation = transform_inf_expr(
-            integrating_depvars,
-            dict_depvar_input,
-            dict_depvars,
-            integrating_variable,
-            transform_indvars)
+            integrating_depvars, dict_depvar_input, dict_depvars, integrating_variable, transform_indvars)
 
         ϵ = 1 / 20 #cbrt(eps(eltypeθ))
 
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 98cacb748c..cbb8ffa46c 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -1,10 +1,8 @@
-using Test, MCMCChains, Lux, ModelingToolkit
+using Test, MCMCChains, Lux, ModelingToolkit, ForwardDiff, Distributions, OrdinaryDiffEq,
+      AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
+      ComponentArrays
 import ModelingToolkit: Interval, infimum, supremum
-using ForwardDiff, Distributions, OrdinaryDiffEq
-using AdvancedHMC, Statistics, Random, Functors
-using NeuralPDE, MonteCarloMeasurements
-using ComponentArrays
-using Flux
+import Flux
 
 Random.seed!(100)
 
@@ -16,20 +14,16 @@ Random.seed!(100)
     eqs = Dt(u(t)) - cos(2 * π * t) ~ 0
     bcs = [u(0) ~ 0.0]
     domains = [t ∈ Interval(0.0, 2.0)]
-    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+    chainl = Chain(Dense(1, 6, tanh), Dense(6, 1))
     initl, st = Lux.setup(Random.default_rng(), chainl)
     @named pde_system = PDESystem(eqs, bcs, domains, [t], [u(t)])
 
     # non adaptive case
     discretization = BayesianPINN([chainl], GridTraining([0.01]))
 
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.02],
-        phystd = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0])
+    sol1 = ahmc_bayesian_pinn_pde(
+        pde_system, discretization; draw_samples = 1500, bcstd = [0.02],
+        phystd = [0.01], priorsNNw = (0.0, 1.0), saveats = [1 / 50.0])
 
     analytic_sol_func(u0, t) = u0 + sin(2 * π * t) / (2 * π)
     ts = vec(sol1.timepoints[1])
@@ -55,19 +49,15 @@ end
     domains = [θ ∈ Interval(0.0, 1.0)]
 
     # Neural network
-    chain = Lux.Chain(Lux.Dense(1, 12, Lux.σ), Lux.Dense(12, 1))
+    chain = Chain(Dense(1, 12, σ), Dense(12, 1))
 
     discretization = BayesianPINN([chain], GridTraining([0.01]))
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
 
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 500,
-        bcstd = [0.1],
-        phystd = [0.05],
-        priorsNNw = (0.0, 10.0),
-        saveats = [1 / 100.0])
+    sol1 = ahmc_bayesian_pinn_pde(
+        pde_system, discretization; draw_samples = 500, bcstd = [0.1],
+        phystd = [0.05], priorsNNw = (0.0, 10.0), saveats = [1 / 100.0])
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol1.timepoints[1]
@@ -99,27 +89,21 @@ end
 
     # Neural network
     chain = [
-        Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
-            Lux.Dense(10, 1)), Lux.Chain(
-            Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
-            Lux.Dense(10, 1)),
-        Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
-            Lux.Dense(10, 1)),
-        Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1)),
-        Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1))]
+        Chain(Dense(1, 10, tanh), Dense(10, 10, tanh), Dense(10, 1)),
+        Chain(Dense(1, 10, tanh), Dense(10, 10, tanh), Dense(10, 1)),
+        Chain(Dense(1, 10, tanh), Dense(10, 10, tanh), Dense(10, 1)),
+        Chain(Dense(1, 4, tanh), Dense(4, 1)),
+        Chain(Dense(1, 4, tanh), Dense(4, 1))
+    ]
 
     discretization = BayesianPINN(chain, GridTraining(0.01))
 
     @named pde_system = PDESystem(eq, bcs, domains, [x],
         [u(x), Dxu(x), Dxxu(x), O1(x), O2(x)])
 
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 200,
-        bcstd = [0.01, 0.01, 0.01, 0.01, 0.01],
-        phystd = [0.005],
-        priorsNNw = (0.0, 10.0),
-        saveats = [1 / 100.0])
+    sol1 = ahmc_bayesian_pinn_pde(pde_system, discretization; draw_samples = 200,
+        bcstd = [0.01, 0.01, 0.01, 0.01, 0.01], phystd = [0.005],
+        priorsNNw = (0.0, 10.0), saveats = [1 / 100.0])
 
     analytic_sol_func(x) = (π * x * (-x + (π^2) * (2 * x - 3) + 1) - sin(π * x)) / (π^3)
 
@@ -148,7 +132,7 @@ end
 
     # Neural network
     dim = 2 # number of dimensions
-    chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
+    chain = Chain(Dense(dim, 9, σ), Dense(9, 9, σ), Dense(9, 1))
 
     # Discretization
     dx = 0.04
@@ -156,13 +140,9 @@ end
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 200,
-        bcstd = [0.003, 0.003, 0.003, 0.003],
-        phystd = [0.003],
-        priorsNNw = (0.0, 10.0),
-        saveats = [1 / 100.0, 1 / 100.0])
+    sol1 = ahmc_bayesian_pinn_pde(pde_system, discretization; draw_samples = 200,
+        bcstd = [0.003, 0.003, 0.003, 0.003], phystd = [0.003],
+        priorsNNw = (0.0, 10.0), saveats = [1 / 100.0, 1 / 100.0])
 
     xs = sol1.timepoints[1]
     analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
@@ -191,17 +171,13 @@ end
     chain = Flux.Chain(Flux.Dense(1, 12, Flux.σ), Flux.Dense(12, 1))
 
     discretization = BayesianPINN([chain], GridTraining([0.01]))
-    @test discretization.chain[1] isa Lux.AbstractExplicitLayer
+    @test discretization.chain[1] isa AbstractLuxLayer
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
 
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 500,
-        bcstd = [0.1],
-        phystd = [0.05],
-        priorsNNw = (0.0, 10.0),
-        saveats = [1 / 100.0])
+    sol1 = ahmc_bayesian_pinn_pde(
+        pde_system, discretization; draw_samples = 500, bcstd = [0.1],
+        phystd = [0.05], priorsNNw = (0.0, 10.0), saveats = [1 / 100.0])
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol1.timepoints[1]
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index c8fe60cb08..fd64e177da 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -1,9 +1,7 @@
-using Test, MCMCChains, Lux, ModelingToolkit
+using Test, MCMCChains, Lux, ModelingToolkit, ForwardDiff, Distributions, OrdinaryDiffEq,
+      AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
+      ComponentArrays
 import ModelingToolkit: Interval, infimum, supremum
-using ForwardDiff, Distributions, OrdinaryDiffEq
-using AdvancedHMC, Statistics, Random, Functors
-using NeuralPDE, MonteCarloMeasurements
-using ComponentArrays
 
 Random.seed!(100)
 
@@ -59,7 +57,7 @@ Random.seed!(100)
         saveats = [1 / 50.0],
         param = [LogNormal(6.0, 0.5)])
 
-        # alternative to QuadratureTraining [WIP]
+    # alternative to QuadratureTraining [WIP]
 
     discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
         dataset = [dataset, nothing])
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 6534e88409..c011e8fe9b 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -1,13 +1,11 @@
-# # Testing Code
-using Test, MCMCChains
-using ForwardDiff, Distributions, OrdinaryDiffEq
-using OptimizationOptimisers, AdvancedHMC, Lux
-using Statistics, Random, Functors, ComponentArrays
-using NeuralPDE, MonteCarloMeasurements
-using Flux
-
-# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
-# on latest Julia version it performs much better for below tests
+using Test, MCMCChains, ForwardDiff, Distributions, OrdinaryDiffEq, OptimizationOptimisers,
+      AdvancedHMC, Lux, Statistics, Random, Functors, ComponentArrays, NeuralPDE,
+      MonteCarloMeasurements
+import Flux
+
+# note that current testing bounds can be easily further tightened but have been inflated
+# for support for Julia build v1 on latest Julia version it performs much better for below
+# tests
 Random.seed!(100)
 
 @testset "Example 1 - without parameter estimation" begin
@@ -32,7 +30,7 @@ Random.seed!(100)
     time1 = vec(collect(Float64, ta0))
     physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 
-    chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+    chainlux = Chain(Dense(1, 7, tanh), Dense(7, 1))
     θinit, st = Lux.setup(Random.default_rng(), chainlux)
 
     fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(
@@ -53,7 +51,7 @@ Random.seed!(100)
     @test mean(abs.(x̂ .- meanscurve)) < 0.05
     @test mean(abs.(physsol1 .- meanscurve)) < 0.005
 
-    #--------------------- solve() call 
+    #--------------------- solve() call
     @test mean(abs.(x̂1 .- pmean(sol1lux.ensemblesol[1]))) < 0.025
     @test mean(abs.(physsol0_1 .- pmean(sol1lux.ensemblesol[1]))) < 0.025
 end
@@ -86,25 +84,15 @@ end
     time1 = vec(collect(Float64, ta0))
     physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 
-    chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+    chainlux1 = Chain(Dense(1, 7, tanh), Dense(7, 1))
     θinit, st = Lux.setup(Random.default_rng(), chainlux1)
 
-    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainlux1,
-        dataset = dataset,
-        draw_samples = 2500,
-        physdt = 1 / 50.0,
-        priorsNNw = (0.0, 3.0),
-        param = [LogNormal(9, 0.5)])
-
-    alg = BNNODE(chainlux1, dataset = dataset,
-        draw_samples = 2500,
-        physdt = 1 / 50.0,
-        priorsNNw = (0.0,
-            3.0),
-        param = [
-            LogNormal(9,
-            0.5)
-        ])
+    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(
+        prob, chainlux1, dataset = dataset, draw_samples = 2500,
+        physdt = 1 / 50.0, priorsNNw = (0.0, 3.0), param = [LogNormal(9, 0.5)])
+
+    alg = BNNODE(chainlux1, dataset = dataset, draw_samples = 2500, physdt = 1 / 50.0,
+        priorsNNw = (0.0, 3.0), param = [LogNormal(9, 0.5)])
 
     sol2lux = solve(prob, alg)
 
@@ -117,13 +105,13 @@ end
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
-    # --------------------- ahmc_bayesian_pinn_ode() call  
+    # --------------------- ahmc_bayesian_pinn_ode() call
     @test mean(abs.(physsol1 .- meanscurve)) < 0.15
 
     # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
     @test abs(p - mean([fhsamples[i][23] for i in 2000:length(fhsamples)])) < abs(0.35 * p)
 
-    #-------------------------- solve() call  
+    #-------------------------- solve() call
     @test mean(abs.(physsol1_1 .- pmean(sol2lux.ensemblesol[1]))) < 8e-2
 
     # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
@@ -145,45 +133,23 @@ end
     dataset = [x̂, time]
     physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
 
-    # seperate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
+    # separate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
     time1 = vec(collect(Float64, range(tspan[1], tspan[2], length = 501)))
     physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 
-    chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+    chainlux12 = Chain(Dense(1, 6, tanh), Dense(6, 6, tanh), Dense(6, 1))
     θinit, st = Lux.setup(Random.default_rng(), chainlux12)
 
     fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(
-        prob, chainlux12,
-        draw_samples = 1500,
-        l2std = [0.03],
-        phystd = [0.03],
-        priorsNNw = (0.0,
-            10.0))
+        prob, chainlux12, draw_samples = 1500, l2std = [0.03],
+        phystd = [0.03], priorsNNw = (0.0, 10.0))
 
     fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(
-        prob, chainlux12,
-        dataset = dataset,
-        draw_samples = 1500,
-        l2std = [0.03],
-        phystd = [0.03],
-        priorsNNw = (0.0,
-            10.0),
-        param = [
-            Normal(-7,
-            4)
-        ])
-
-    alg = BNNODE(chainlux12,
-        dataset = dataset,
-        draw_samples = 1500,
-        l2std = [0.03],
-        phystd = [0.03],
-        priorsNNw = (0.0,
-            10.0),
-        param = [
-            Normal(-7,
-            4)
-        ])
+        prob, chainlux12, dataset = dataset, draw_samples = 1500, l2std = [0.03],
+        phystd = [0.03], priorsNNw = (0.0, 10.0), param = [Normal(-7, 4)])
+
+    alg = BNNODE(chainlux12, dataset = dataset, draw_samples = 1500, l2std = [0.03],
+        phystd = [0.03], priorsNNw = (0.0, 10.0), param = [Normal(-7, 4)])
 
     sol3lux_pestim = solve(prob, alg)
 
@@ -203,18 +169,18 @@ end
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
-    @test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
-    @test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
-    @test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
-    @test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
+    @test mean(abs, sol.u .- meanscurve2_1) < 1e-1
+    @test mean(abs, physsol1 .- meanscurve2_1) < 1e-1
+    @test mean(abs, sol.u .- meanscurve2_2) < 5e-2
+    @test mean(abs, physsol1 .- meanscurve2_2) < 5e-2
 
     # estimated parameters(lux chain)
     param1 = mean(i[62] for i in fhsampleslux22[1000:length(fhsampleslux22)])
     @test abs(param1 - p) < abs(0.3 * p)
 
-    #-------------------------- solve() call 
+    #-------------------------- solve() call
     # (lux chain)
-    @test mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 0.15
+    @test mean(abs, physsol2 .- pmean(sol3lux_pestim.ensemblesol[1])) < 0.15
     # estimated parameters(lux chain)
     param1 = sol3lux_pestim.estimated_de_params[1]
     @test abs(param1 - p) < abs(0.45 * p)
@@ -245,7 +211,7 @@ end
     fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(
         prob, chainflux, draw_samples = 2500)
     alg = BNNODE(chainflux, draw_samples = 2500)
-    @test alg.chain isa Lux.AbstractExplicitLayer
+    @test alg.chain isa AbstractLuxLayer
 end
 
 @testset "Example 3 but with the new objective" begin
@@ -264,47 +230,25 @@ end
     dataset = [x̂, time]
     physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
 
-    # seperate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
+    # separate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
     time1 = vec(collect(Float64, range(tspan[1], tspan[2], length = 501)))
     physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 
-    chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+    chainlux12 = Chain(Dense(1, 6, tanh), Dense(6, 6, tanh), Dense(6, 1))
     θinit, st = Lux.setup(Random.default_rng(), chainlux12)
 
     fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(
-        prob, chainlux12,
-        dataset = dataset,
-        draw_samples = 1000,
-        l2std = [0.1],
-        phystd = [0.03],
-        priorsNNw = (0.0,
-            1.0),
-        param = [
-            Normal(-7, 3)
-        ])
+        prob, chainlux12, dataset = dataset, draw_samples = 1000, l2std = [0.1],
+        phystd = [0.03], priorsNNw = (0.0, 1.0), param = [Normal(-7, 3)])
 
     fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(
-        prob, chainlux12,
-        dataset = dataset,
-        draw_samples = 1000,
-        l2std = [0.1],
-        phystd = [0.03],
-        priorsNNw = (0.0,
-            1.0),
-        param = [
-            Normal(-7, 3)
-        ], estim_collocate = true)
-
-    alg = BNNODE(chainlux12,
-        dataset = dataset,
-        draw_samples = 1000,
-        l2std = [0.1],
-        phystd = [0.03],
-        priorsNNw = (0.0,
-            1.0),
-        param = [
-            Normal(-7, 3)
-        ], estim_collocate = true)
+        prob, chainlux12, dataset = dataset, draw_samples = 1000,
+        l2std = [0.1], phystd = [0.03], priorsNNw = (0.0, 1.0),
+        param = [Normal(-7, 3)], estim_collocate = true)
+
+    alg = BNNODE(
+        chainlux12, dataset = dataset, draw_samples = 1000, l2std = [0.1], phystd = [0.03],
+        priorsNNw = (0.0, 1.0), param = [Normal(-7, 3)], estim_collocate = true)
 
     sol3lux_pestim = solve(prob, alg)
 
@@ -324,25 +268,25 @@ end
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
-    @test mean(abs.(sol.u .- meanscurve2_2)) < 6e-2
-    @test mean(abs.(physsol1 .- meanscurve2_2)) < 6e-2
+    @test_broken mean(abs.(sol.u .- meanscurve2_2)) < 6e-2
+    @test_broken mean(abs.(physsol1 .- meanscurve2_2)) < 6e-2
     @test mean(abs.(sol.u .- meanscurve2_1)) > mean(abs.(sol.u .- meanscurve2_2))
     @test mean(abs.(physsol1 .- meanscurve2_1)) > mean(abs.(physsol1 .- meanscurve2_2))
 
     # estimated parameters(lux chain)
     param2 = mean(i[62] for i in fhsampleslux22[750:length(fhsampleslux22)])
-    @test abs(param2 - p) < abs(0.25 * p)
+    @test_broken abs(param2 - p) < abs(0.25 * p)
 
     param1 = mean(i[62] for i in fhsampleslux12[750:length(fhsampleslux12)])
-    @test abs(param1 - p) < abs(0.75 * p)
+    @test abs(param1 - p) < abs(0.8 * p)
     @test abs(param2 - p) < abs(param1 - p)
 
-    #-------------------------- solve() call 
+    #-------------------------- solve() call
     # (lux chain)
-    @test mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 0.1
+    @test_broken mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 0.1
     # estimated parameters(lux chain)
     param3 = sol3lux_pestim.estimated_de_params[1]
-    @test abs(param3 - p) < abs(0.2 * p)
+    @test_broken abs(param3 - p) < abs(0.2 * p)
 end
 
 @testset "Example 4 - improvement" begin
@@ -375,32 +319,16 @@ end
     y = u[2, :] + (0.8 .* randn(length(u[2, :])))
     dataset = [x, y, times]
 
-    chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-        Lux.Dense(6, 2))
-
-    alg1 = BNNODE(chain;
-        dataset = dataset,
-        draw_samples = 1000,
-        l2std = [0.2, 0.2],
-        phystd = [0.1, 0.1],
-        priorsNNw = (0.0, 1.0),
-        param = [
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5)])
-
-    alg2 = BNNODE(chain;
-        dataset = dataset,
-        draw_samples = 1000,
-        l2std = [0.2, 0.2],
-        phystd = [0.1, 0.1],
-        priorsNNw = (0.0, 1.0),
-        param = [
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5)], estim_collocate = true)
+    chain = Chain(Dense(1, 6, tanh), Dense(6, 6, tanh), Dense(6, 2))
+
+    alg1 = BNNODE(chain; dataset = dataset, draw_samples = 1000,
+        l2std = [0.2, 0.2], phystd = [0.1, 0.1], priorsNNw = (0.0, 1.0),
+        param = [Normal(2, 0.5), Normal(2, 0.5), Normal(2, 0.5), Normal(2, 0.5)])
+
+    alg2 = BNNODE(chain; dataset = dataset, draw_samples = 1000,
+        l2std = [0.2, 0.2], phystd = [0.1, 0.1], priorsNNw = (0.0, 1.0),
+        param = [Normal(2, 0.5), Normal(2, 0.5), Normal(2, 0.5), Normal(2, 0.5)],
+        estim_collocate = true)
 
     @time sol_pestim1 = solve(prob, alg1; saveat = dt)
     @time sol_pestim2 = solve(prob, alg2; saveat = dt)
@@ -408,5 +336,5 @@ end
     unsafe_comparisons(true)
     bitvec = abs.(p .- sol_pestim1.estimated_de_params) .>
              abs.(p .- sol_pestim2.estimated_de_params)
-    @test bitvec == ones(size(bitvec))
-end
\ No newline at end of file
+    @test_broken bitvec == ones(size(bitvec))
+end
diff --git a/test/IDE_tests.jl b/test/IDE_tests.jl
index eda5d7f380..f0cdfd5d52 100644
--- a/test/IDE_tests.jl
+++ b/test/IDE_tests.jl
@@ -1,10 +1,7 @@
-using Test, NeuralPDE
-using Optimization, OptimizationOptimJL
+using Test, NeuralPDE, Optimization, OptimizationOptimJL, DomainSets, Lux, Random,
+      Statistics
 import ModelingToolkit: Interval
-using DomainSets, Flux
-import Lux
 
-using Random
 Random.seed!(110)
 
 callback = function (p, l)
@@ -20,7 +17,7 @@ end
     eq = Di(i(t)) + 2 * i(t) + 5 * Ii(i(t)) ~ 1
     bcs = [i(0.0) ~ 0.0]
     domains = [t ∈ Interval(0.0, 2.0)]
-    chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1))
+    chain = Chain(Dense(1, 15, σ), Dense(15, 1))
     strategy_ = GridTraining(0.1)
     discretization = PhysicsInformedNN(chain, strategy_)
     @named pde_system = PDESystem(eq, bcs, domains, [t], [i(t)])
@@ -31,7 +28,7 @@ end
     analytic_sol_func(t) = 1 / 2 * (exp(-t)) * (sin(2 * t))
     u_real = [analytic_sol_func(t) for t in ts]
     u_predict = [first(phi([t], res.u)) for t in ts]
-    @test Flux.mse(u_real, u_predict) < 0.01
+    @test mean(abs2, u_real .- u_predict) < 0.01
 end
 
 @testset "Example 2 - 1D" begin
@@ -45,7 +42,7 @@ end
 
     bcs = [u(0.0) ~ 0.0]
     domains = [x ∈ Interval(0.0, 1.00)]
-    chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1))
+    chain = Chain(Dense(1, 15, σ), Dense(15, 1))
     strategy_ = GridTraining(0.1)
     discretization = PhysicsInformedNN(chain, strategy_)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
@@ -56,7 +53,7 @@ end
     phi = discretization.phi
     u_predict = [first(phi([x], res.u)) for x in xs]
     u_real = [x^2 / cos(x) for x in xs]
-    @test Flux.mse(u_real, u_predict) < 0.001
+    @test mean(abs2, u_real .- u_predict) < 0.01
 end
 
 @testset "Example 3 - 2 Inputs, 1 Output" begin
@@ -68,7 +65,7 @@ end
     eq = Ix(u(x, y)) ~ 1 / 3
     bcs = [u(0.0, 0.0) ~ 1, Dx(u(x, y)) ~ -2 * x, Dy(u(x, y)) ~ -2 * y]
     domains = [x ∈ Interval(0.0, 1.00), y ∈ Interval(0.0, 1.00)]
-    chain = Lux.Chain(Lux.Dense(2, 15, Lux.σ), Lux.Dense(15, 1))
+    chain = Chain(Dense(2, 15, σ), Dense(15, 1))
     strategy_ = GridTraining(0.1)
     discretization = PhysicsInformedNN(chain, strategy_)
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
@@ -79,7 +76,7 @@ end
     phi = discretization.phi
     u_real = collect(1 - x^2 - y^2 for y in ys, x in xs)
     u_predict = collect(Array(phi([x, y], res.u))[1] for y in ys, x in xs)
-    @test Flux.mse(u_real, u_predict) < 0.001
+    @test mean(abs2, u_real .- u_predict) < 0.001
 end
 
 @testset "Example 4 - 2 Inputs, 1 Output" begin
@@ -91,7 +88,7 @@ end
     eq = Ix(u(x, y)) ~ 5 / 12
     bcs = [u(0.0, 0.0) ~ 0, Dy(u(x, y)) ~ 2 * y, u(x, 0) ~ x]
     domains = [x ∈ Interval(0.0, 1.00), y ∈ Interval(0.0, 1.00)]
-    chain = Lux.Chain(Lux.Dense(2, 15, Lux.σ), Lux.Dense(15, 1))
+    chain = Chain(Dense(2, 15, σ), Dense(15, 1))
     strategy_ = GridTraining(0.1)
     discretization = PhysicsInformedNN(chain, strategy_)
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
@@ -102,7 +99,7 @@ end
     phi = discretization.phi
     u_real = collect(x + y^2 for y in ys, x in xs)
     u_predict = collect(Array(phi([x, y], res.u))[1] for y in ys, x in xs)
-    @test Flux.mse(u_real, u_predict) < 0.01
+    @test mean(abs2, u_real .- u_predict) < 0.01
 end
 
 @testset "Example 5 - 1 Input, 2 Outputs" begin
@@ -113,7 +110,7 @@ end
     eqs = [Ix(u(x) * w(x)) ~ log(abs(x)), Dx(w(x)) ~ -2 / (x^3), u(x) ~ x]
     bcs = [u(1.0) ~ 1.0, w(1.0) ~ 1.0]
     domains = [x ∈ Interval(1.0, 2.0)]
-    chains = [Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1)) for _ in 1:2]
+    chains = [Chain(Dense(1, 15, σ), Dense(15, 1)) for _ in 1:2]
     strategy_ = GridTraining(0.1)
     discretization = PhysicsInformedNN(chains, strategy_)
     @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x), w(x)])
@@ -125,8 +122,8 @@ end
     w_predict = [(phi[2]([x], res.u.depvar.w))[1] for x in xs]
     u_real = [x for x in xs]
     w_real = [1 / x^2 for x in xs]
-    @test Flux.mse(u_real, u_predict) < 0.001
-    @test Flux.mse(w_real, w_predict) < 0.001
+    @test mean(abs2, u_real .- u_predict) < 0.001
+    @test mean(abs2, w_real .- w_predict) < 0.001
 end
 
 @testset "Example 6: Infinity" begin
@@ -137,7 +134,7 @@ end
     eqs = [I(u(x)) ~ Iinf(u(x)) - 1 / x]
     bcs = [u(1) ~ 1]
     domains = [x ∈ Interval(1.0, 2.0)]
-    chain = Lux.Chain(Lux.Dense(1, 10, Lux.σ), Lux.Dense(10, 1))
+    chain = Chain(Dense(1, 10, σ), Dense(10, 1))
     discretization = PhysicsInformedNN(chain, NeuralPDE.GridTraining(0.1))
     @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x)])
     prob = discretize(pde_system, discretization)
@@ -146,7 +143,7 @@ end
     phi = discretization.phi
     u_predict = [first(phi([x], res.u)) for x in xs]
     u_real = [1 / x^2 for x in xs]
-    @test u_real≈u_predict rtol=10^-2
+    @test u_real≈u_predict rtol=10^-1
 end
 
 @testset "Example 7: Infinity" begin
@@ -156,7 +153,7 @@ end
     eq = I(u(x)) ~ 1 / x
     domains = [x ∈ Interval(1.0, 2.0)]
     bcs = [u(1) ~ 1]
-    chain = Lux.Chain(Lux.Dense(1, 12, Lux.tanh), Lux.Dense(12, 1))
+    chain = Chain(Dense(1, 12, tanh), Dense(12, 1))
     discretization = PhysicsInformedNN(chain, GridTraining(0.1))
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
     prob = discretize(pde_system, discretization)
diff --git a/test/NNDAE_tests.jl b/test/NNDAE_tests.jl
index bbcf12dd6d..cc36fd09e8 100644
--- a/test/NNDAE_tests.jl
+++ b/test/NNDAE_tests.jl
@@ -1,7 +1,5 @@
-using Test, Flux
-using Random, NeuralPDE
-using OrdinaryDiffEq, Statistics
-import Lux, OptimizationOptimisers, OptimizationOptimJL
+using Test, Random, NeuralPDE, OrdinaryDiffEq, Statistics, Lux, Optimisers,
+      OptimizationOptimJL, Optimisers
 
 Random.seed!(100)
 
@@ -22,15 +20,12 @@ Random.seed!(100)
     ground_sol = solve(prob_mm, Rodas5(), reltol = 1e-8, abstol = 1e-8)
 
     example = (du, u, p, t) -> [cos(2pi * t) - du[1], u[2] + cos(2pi * t) - du[2]]
-    differential_vars = [true, false]
-    prob = DAEProblem(example, du₀, u₀, tspan; differential_vars = differential_vars)
-    chain = Lux.Chain(Lux.Dense(1, 15, cos), Lux.Dense(15, 15, sin), Lux.Dense(15, 2))
-    opt = OptimizationOptimisers.Adam(0.1)
-    alg = NeuralPDE.NNDAE(chain, opt; autodiff = false)
+    prob = DAEProblem(example, du₀, u₀, tspan; differential_vars = [true, false])
+    chain = Chain(Dense(1, 15, cos), Dense(15, 15, sin), Dense(15, 2))
+    alg = NNDAE(chain, Optimisers.Adam(0.01); autodiff = false)
 
-    sol = solve(prob,
-        alg, verbose = false, dt = 1 / 100.0f0,
-        maxiters = 3000, abstol = 1.0f-10)
+    sol = solve(
+        prob, alg, verbose = false, dt = 1 / 100.0f0, maxiters = 3000, abstol = 1.0f-10)
     @test ground_sol(0:(1 / 100):1)≈sol atol=0.4
 end
 
@@ -52,13 +47,11 @@ end
     example = (du, u, p, t) -> [u[1] - t - du[1], u[2] - t - du[2]]
     differential_vars = [false, true]
     prob = DAEProblem(example, du₀, u₀, tspan; differential_vars = differential_vars)
-    chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 2))
-    opt = OptimizationOptimisers.Adam(0.1)
-    alg = NNDAE(chain, OptimizationOptimisers.Adam(0.1); autodiff = false)
+    chain = Chain(Dense(1, 15, σ), Dense(15, 2))
+    alg = NNDAE(chain, Optimisers.Adam(0.1); autodiff = false)
 
     sol = solve(prob,
-        alg, verbose = false, dt = 1 / 100.0f0,
-        maxiters = 3000, abstol = 1.0f-10)
+        alg, verbose = false, dt = 1 / 100.0f0, maxiters = 3000, abstol = 1.0f-10)
 
     @test ground_sol(0:(1 / 100):(pi / 2))≈sol atol=0.4
 end
diff --git a/test/NNODE_tests.jl b/test/NNODE_tests.jl
index 0cd688e310..96fc17a194 100644
--- a/test/NNODE_tests.jl
+++ b/test/NNODE_tests.jl
@@ -1,29 +1,23 @@
-using Test
-using Random, NeuralPDE
-using OrdinaryDiffEq, Statistics
-import Lux, OptimizationOptimisers, OptimizationOptimJL
-using Flux
-using LineSearches
+using Test, Random, NeuralPDE, OrdinaryDiffEq, Statistics, Lux, OptimizationOptimisers,
+      OptimizationOptimJL, WeightInitializers, LineSearches
+import Flux
 
 rng = Random.default_rng()
 Random.seed!(100)
 
 @testset "Scalar" begin
-    # Run a solve on scalars
-    println("Scalar")
     linear = (u, p, t) -> cos(2pi * t)
     tspan = (0.0f0, 1.0f0)
     u0 = 0.0f0
     prob = ODEProblem(linear, u0, tspan)
-    luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+    luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
     opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
 
     sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = false,
         abstol = 1.0f-10, maxiters = 200)
 
     @test_throws ArgumentError solve(prob, NNODE(luxchain, opt; autodiff = true),
-        dt = 1 / 20.0f0,
-        verbose = false, abstol = 1.0f-10, maxiters = 200)
+        dt = 1 / 20.0f0, verbose = false, abstol = 1.0f-10, maxiters = 200)
 
     sol = solve(prob, NNODE(luxchain, opt), verbose = false,
         abstol = 1.0f-6, maxiters = 200)
@@ -37,21 +31,18 @@ Random.seed!(100)
 end
 
 @testset "Vector" begin
-    # Run a solve on vectors
-    println("Vector")
     linear = (u, p, t) -> [cos(2pi * t)]
     tspan = (0.0f0, 1.0f0)
     u0 = [0.0f0]
     prob = ODEProblem(linear, u0, tspan)
-    luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+    luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
 
     opt = OptimizationOptimJL.BFGS()
     sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, abstol = 1e-10,
         verbose = false, maxiters = 200)
 
     @test_throws ArgumentError solve(prob, NNODE(luxchain, opt; autodiff = true),
-        dt = 1 / 20.0f0,
-        abstol = 1e-10, verbose = false, maxiters = 200)
+        dt = 1 / 20.0f0, abstol = 1e-10, verbose = false, maxiters = 200)
 
     sol = solve(prob, NNODE(luxchain, opt), abstol = 1.0f-6,
         verbose = false, maxiters = 200)
@@ -62,27 +53,24 @@ end
 end
 
 @testset "Example 1" begin
-    println("Example 1")
     linear = (u, p, t) -> @. t^3 + 2 * t + (t^2) * ((1 + 3 * (t^2)) / (1 + t + (t^3))) -
                              u * (t + ((1 + 3 * (t^2)) / (1 + t + t^3)))
     linear_analytic = (u0, p, t) -> [exp(-(t^2) / 2) / (1 + t + t^3) + t^2]
     prob = ODEProblem(
         ODEFunction(linear, analytic = linear_analytic), [1.0f0], (0.0f0, 1.0f0))
-    luxchain = Lux.Chain(Lux.Dense(1, 128, Lux.σ), Lux.Dense(128, 1))
+    luxchain = Chain(Dense(1, 128, σ), Dense(128, 1))
     opt = OptimizationOptimisers.Adam(0.01)
 
     sol = solve(prob, NNODE(luxchain, opt), verbose = false, maxiters = 400)
     @test sol.errors[:l2] < 0.5
 
-    sol = solve(prob,
-        NNODE(luxchain, opt; batch = false,
-            strategy = StochasticTraining(100)),
+    sol = solve(
+        prob, NNODE(luxchain, opt; batch = false, strategy = StochasticTraining(100)),
         verbose = false, maxiters = 400)
     @test sol.errors[:l2] < 0.5
 
-    sol = solve(prob,
-        NNODE(luxchain, opt; batch = true,
-            strategy = StochasticTraining(100)),
+    sol = solve(
+        prob, NNODE(luxchain, opt; batch = true, strategy = StochasticTraining(100)),
         verbose = false, maxiters = 400)
     @test sol.errors[:l2] < 0.5
 
@@ -90,71 +78,44 @@ end
         maxiters = 400, dt = 1 / 5.0f0)
     @test sol.errors[:l2] < 0.5
 
-    sol = solve(prob, NNODE(luxchain, opt; batch = true), verbose = false,
-        maxiters = 400,
-        dt = 1 / 5.0f0)
+    sol = solve(prob, NNODE(luxchain, opt; batch = true),
+        verbose = false, maxiters = 400, dt = 1 / 5.0f0)
     @test sol.errors[:l2] < 0.5
 end
 
 @testset "Example 2" begin
-    println("Example 2")
     linear = (u, p, t) -> -u / 5 + exp(-t / 5) .* cos(t)
     linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t))
     prob = ODEProblem(
         ODEFunction(linear, analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
-    luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+    luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
 
-    opt = OptimizationOptimisers.Adam(0.1)
-    sol = solve(prob, NNODE(luxchain, opt), verbose = false, maxiters = 400,
-        abstol = 1.0f-8)
-    @test sol.errors[:l2] < 0.5
-
-    sol = solve(prob,
-        NNODE(luxchain, opt; batch = false,
-            strategy = StochasticTraining(100)),
-        verbose = false, maxiters = 400,
-        abstol = 1.0f-8)
-    @test sol.errors[:l2] < 0.5
-
-    sol = solve(prob,
-        NNODE(luxchain, opt; batch = true,
-            strategy = StochasticTraining(100)),
-        verbose = false, maxiters = 400,
-        abstol = 1.0f-8)
-    @test sol.errors[:l2] < 0.5
-
-    sol = solve(prob, NNODE(luxchain, opt; batch = false), verbose = false,
-        maxiters = 400,
-        abstol = 1.0f-8, dt = 1 / 5.0f0)
-    @test sol.errors[:l2] < 0.5
-
-    sol = solve(prob, NNODE(luxchain, opt; batch = true), verbose = false,
-        maxiters = 400,
-        abstol = 1.0f-8, dt = 1 / 5.0f0)
-    @test sol.errors[:l2] < 0.5
+    @testset for batch in (true, false), strategy in (StochasticTraining(100), nothing)
+        opt = OptimizationOptimisers.Adam(0.1)
+        sol = solve(prob, NNODE(luxchain, opt; batch, strategy),
+            verbose = false, maxiters = 400, abstol = 1.0f-8)
+        @test sol.errors[:l2] < 0.5
+    end
 end
 
 @testset "Example 3" begin
-    println("Example 3")
     linear = (u, p, t) -> [cos(2pi * t), sin(2pi * t)]
     tspan = (0.0f0, 1.0f0)
     u0 = [0.0f0, -1.0f0 / 2pi]
     linear_analytic = (u0, p, t) -> [sin(2pi * t) / 2pi, -cos(2pi * t) / 2pi]
     odefunction = ODEFunction(linear, analytic = linear_analytic)
     prob = ODEProblem(odefunction, u0, tspan)
-    luxchain = Lux.Chain(Lux.Dense(1, 10, Lux.σ), Lux.Dense(10, 2))
+    luxchain = Chain(Dense(1, 10, σ), Dense(10, 2))
     opt = OptimizationOptimisers.Adam(0.1)
     alg = NNODE(luxchain, opt; autodiff = false)
 
-    sol = solve(prob,
-        alg, verbose = false, dt = 1 / 40.0f0,
-        maxiters = 2000, abstol = 1.0f-7)
+    sol = solve(
+        prob, alg, verbose = false, dt = 1 / 40.0f0, maxiters = 2000, abstol = 1.0f-7)
     @test sol.errors[:l2] < 0.5
 end
 
 @testset "Training Strategies" begin
     @testset "WeightedIntervalTraining" begin
-        println("WeightedIntervalTraining")
         function f(u, p, t)
             [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
         end
@@ -162,17 +123,21 @@ end
         u0 = [1.0, 1.0]
         prob_oop = ODEProblem{false}(f, u0, (0.0, 3.0), p)
         true_sol = solve(prob_oop, Tsit5(), saveat = 0.01)
-        func = Lux.σ
-        N = 12
-        chain = Lux.Chain(
-            Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N, func),
-            Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
-        opt = OptimizationOptimisers.Adam(0.01)
+
+        N = 64
+        chain = Chain(
+            Dense(1, N, gelu),
+            Dense(N, N, gelu),
+            Dense(N, N, gelu),
+            Dense(N, N, gelu),
+            Dense(N, length(u0))
+        )
+        opt = OptimizationOptimisers.Adam(0.001)
         weights = [0.7, 0.2, 0.1]
         points = 200
         alg = NNODE(chain, opt, autodiff = false,
-            strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
-        sol = solve(prob_oop, alg, verbose = false, maxiters = 5000, saveat = 0.01)
+            strategy = WeightedIntervalTraining(weights, points))
+        sol = solve(prob_oop, alg; verbose = false, maxiters = 5000, saveat = 0.01)
         @test abs(mean(sol) - mean(true_sol)) < 0.2
     end
 
@@ -186,46 +151,40 @@ end
     u_analytical(x) = (1 / (2pi)) .* sin.(2pi .* x)
 
     @testset "GridTraining" begin
-        println("GridTraining")
-        luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+        luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
         (u_, t_) = (u_analytical(ts), ts)
         function additional_loss(phi, θ)
             return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
         end
-        alg1 = NNODE(luxchain, opt, strategy = GridTraining(0.01),
-            additional_loss = additional_loss)
-        sol1 = solve(prob, alg1, verbose = false, abstol = 1e-8, maxiters = 500)
+        alg1 = NNODE(luxchain, opt; strategy = GridTraining(0.01), additional_loss)
+        sol1 = solve(prob, alg1; verbose = false, abstol = 1e-8, maxiters = 500)
         @test sol1.errors[:l2] < 0.5
     end
 
     @testset "QuadratureTraining" begin
-        println("QuadratureTraining")
-        luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+        luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
         (u_, t_) = (u_analytical(ts), ts)
         function additional_loss(phi, θ)
             return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
         end
-        alg1 = NNODE(luxchain, opt, additional_loss = additional_loss)
-        sol1 = solve(prob, alg1, verbose = false, abstol = 1e-10, maxiters = 200)
+        alg1 = NNODE(luxchain, opt; additional_loss)
+        sol1 = solve(prob, alg1; verbose = false, abstol = 1e-10, maxiters = 200)
         @test sol1.errors[:l2] < 0.5
     end
 
     @testset "StochasticTraining" begin
-        println("StochasticTraining")
-        luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+        luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
         (u_, t_) = (u_analytical(ts), ts)
         function additional_loss(phi, θ)
             return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
         end
-        alg1 = NNODE(luxchain, opt, strategy = StochasticTraining(1000),
-            additional_loss = additional_loss)
-        sol1 = solve(prob, alg1, verbose = false, abstol = 1e-8, maxiters = 500)
+        alg1 = NNODE(luxchain, opt; strategy = StochasticTraining(1000), additional_loss)
+        sol1 = solve(prob, alg1; verbose = false, abstol = 1e-8, maxiters = 500)
         @test sol1.errors[:l2] < 0.5
     end
 end
 
 @testset "Parameter Estimation" begin
-    println("Parameter Estimation")
     function lorenz(u, p, t)
         return [p[1] * (u[2] - u[1]),
             u[1] * (p[2] - u[3]) - u[2],
@@ -241,16 +200,16 @@ end
         return sum(abs2, phi(t_, θ) .- u_) / 100
     end
     n = 8
-    luxchain = Lux.Chain(
-        Lux.Dense(1, n, Lux.σ),
-        Lux.Dense(n, n, Lux.σ),
-        Lux.Dense(n, n, Lux.σ),
-        Lux.Dense(n, 3)
+    luxchain = Chain(
+        Dense(1, n, σ),
+        Dense(n, n, σ),
+        Dense(n, n, σ),
+        Dense(n, 3)
     )
     opt = OptimizationOptimJL.BFGS(linesearch = BackTracking())
-    alg = NNODE(luxchain, opt, strategy = GridTraining(0.01),
-        param_estim = true, additional_loss = additional_loss)
-    sol = solve(prob, alg, verbose = false, abstol = 1e-8, maxiters = 1000, saveat = t_)
+    alg = NNODE(luxchain, opt; strategy = GridTraining(0.01),
+        param_estim = true, additional_loss)
+    sol = solve(prob, alg; verbose = false, abstol = 1e-8, maxiters = 1000, saveat = t_)
     @test sol.k.u.p≈true_p atol=1e-2
     @test reduce(hcat, sol.u)≈u_ atol=1e-2
 end
@@ -274,11 +233,11 @@ end
 
     problem = ODEProblem(bloch_equations, u0, time_span, parameters)
 
-    chain = Lux.Chain(
-        Lux.Dense(1, 16, tanh;
-            init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...)),
-        Lux.Dense(
-            16, 4; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...))
+    chain = Chain(
+        Dense(1, 16, tanh;
+            init_weight = (rng, a...) -> kaiming_normal(rng, ComplexF64, a...)),
+        Dense(
+            16, 4; init_weight = (rng, a...) -> kaiming_normal(rng, ComplexF64, a...))
     )
     ps, st = Lux.setup(rng, chain)
 
@@ -299,7 +258,6 @@ end
 end
 
 @testset "Translating from Flux" begin
-    println("Translating from Flux")
     linear = (u, p, t) -> cos(2pi * t)
     linear_analytic = (u, p, t) -> (1 / (2pi)) * sin(2pi * t)
     tspan = (0.0, 1.0)
@@ -310,7 +268,7 @@ end
     u_analytical(x) = (1 / (2pi)) .* sin.(2pi .* x)
     fluxchain = Flux.Chain(Flux.Dense(1, 5, Flux.σ), Flux.Dense(5, 1))
     alg1 = NNODE(fluxchain, opt)
-    @test alg1.chain isa Lux.AbstractExplicitLayer
+    @test alg1.chain isa AbstractLuxLayer
     sol1 = solve(prob, alg1, verbose = false, abstol = 1e-10, maxiters = 200)
     @test sol1.errors[:l2] < 0.5
 end
diff --git a/test/NNODE_tstops_test.jl b/test/NNODE_tstops_test.jl
index edcf0916a5..82f0278a5d 100644
--- a/test/NNODE_tstops_test.jl
+++ b/test/NNODE_tstops_test.jl
@@ -1,4 +1,4 @@
-using OrdinaryDiffEq, Lux, OptimizationOptimisers, Test, Statistics, NeuralPDE
+using OrdinaryDiffEq, Lux, OptimizationOptimisers, Optimisers, Test, Statistics, NeuralPDE
 
 function fu(u, p, t)
     [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
@@ -13,78 +13,31 @@ points3 = [rand() + 2 for i in 1:40]
 addedPoints = vcat(points1, points2, points3)
 
 saveat = 0.01
-maxiters = 30000
 
 prob_oop = ODEProblem{false}(fu, u0, tspan, p)
-true_sol = solve(prob_oop, Tsit5(), saveat = saveat)
-func = Lux.σ
-N = 12
-chain = Lux.Chain(Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N, func),
-    Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
+true_sol = solve(prob_oop, Tsit5(); saveat)
+N = 16
+chain = Chain(
+    Dense(1, N, σ), Dense(N, N, σ), Dense(N, N, σ), Dense(N, N, σ), Dense(N, length(u0)))
 
-opt = OptimizationOptimisers.Adam(0.01)
+opt = Adam(0.01)
 threshold = 0.2
 
-#bad choices for weights, samples and dx so that the algorithm will fail without the added points
-weights = [0.3, 0.3, 0.4]
-points = 3
-dx = 1.0
+@testset "$(nameof(typeof(strategy)))" for strategy in [
+    GridTraining(1.0),
+    WeightedIntervalTraining([0.3, 0.3, 0.4], 3),
+    StochasticTraining(3)
+]
+    alg = NNODE(chain, opt; autodiff = false, strategy)
 
-@testset "GridTraining" begin
-    println("GridTraining")
     @testset "Without added points" begin
-        println("Without added points")
-        # (difference between solutions should be high)
-        alg = NNODE(chain, opt, autodiff = false, strategy = GridTraining(dx))
-        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat)
+        sol = solve(prob_oop, alg; verbose = false, maxiters = 1000, saveat)
         @test abs(mean(sol) - mean(true_sol)) > threshold
     end
-    @testset "With added points" begin
-        println("With added points")
-        # (difference between solutions should be low)
-        alg = NNODE(chain, opt, autodiff = false, strategy = GridTraining(dx))
-        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters,
-            saveat = saveat, tstops = addedPoints)
-        @test abs(mean(sol) - mean(true_sol)) < threshold
-    end
-end
 
-@testset "WeightedIntervalTraining" begin
-    println("WeightedIntervalTraining")
-    @testset "Without added points" begin
-        println("Without added points")
-        # (difference between solutions should be high)
-        alg = NNODE(chain, opt, autodiff = false,
-            strategy = WeightedIntervalTraining(weights, points))
-        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat)
-        @test abs(mean(sol) - mean(true_sol)) > threshold
-    end
-    @testset "With added points" begin
-        println("With added points")
-        # (difference between solutions should be low)
-        alg = NNODE(chain, opt, autodiff = false,
-            strategy = WeightedIntervalTraining(weights, points))
-        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters,
-            saveat = saveat, tstops = addedPoints)
-        @test abs(mean(sol) - mean(true_sol)) < threshold
-    end
-end
-
-@testset "StochasticTraining" begin
-    println("StochasticTraining")
-    @testset "Without added points" begin
-        println("Without added points")
-        # (difference between solutions should be high)
-        alg = NNODE(chain, opt, autodiff = false, strategy = StochasticTraining(points))
-        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat)
-        @test abs(mean(sol) - mean(true_sol)) > threshold
-    end
     @testset "With added points" begin
-        println("With added points")
-        # (difference between solutions should be low)
-        alg = NNODE(chain, opt, autodiff = false, strategy = StochasticTraining(points))
-        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters,
-            saveat = saveat, tstops = addedPoints)
+        sol = solve(
+            prob_oop, alg; verbose = false, maxiters = 10000, saveat, tstops = addedPoints)
         @test abs(mean(sol) - mean(true_sol)) < threshold
     end
 end
diff --git a/test/NNPDE_tests.jl b/test/NNPDE_tests.jl
index 7236ac041c..888179b561 100644
--- a/test/NNPDE_tests.jl
+++ b/test/NNPDE_tests.jl
@@ -1,14 +1,8 @@
-using NeuralPDE, Test
-using Optimization, OptimizationOptimJL, OptimizationOptimisers
-using Integrals, Cubature
-using QuasiMonteCarlo
+using NeuralPDE, Test, Optimization, OptimizationOptimJL, OptimizationOptimisers, Integrals,
+      Cubature, QuasiMonteCarlo, DomainSets, Lux, LineSearches, Random
 import ModelingToolkit: Interval, infimum, supremum
-using DomainSets
-import Lux
-using LineSearches
-using Flux
+import Flux
 
-using Random
 Random.seed!(100)
 
 callback = function (p, l)
@@ -33,7 +27,7 @@ function test_ode(strategy_)
     domains = [θ ∈ Interval(0.0, 1.0)]
 
     # Neural network
-    chain = Lux.Chain(Lux.Dense(1, 12, Lux.σ), Lux.Dense(12, 1))
+    chain = Chain(Dense(1, 12, σ), Dense(12, 1))
 
     discretization = PhysicsInformedNN(chain, strategy_)
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
@@ -54,18 +48,12 @@ end
 
 grid_strategy = GridTraining(0.1)
 quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
-    reltol = 1e3, abstol = 1e-3,
-    maxiters = 50, batch = 100)
+    reltol = 1e3, abstol = 1e-3, maxiters = 50, batch = 100)
 stochastic_strategy = StochasticTraining(100; bcs_points = 50)
-quasirandom_strategy = QuasiRandomTraining(100;
-    sampling_alg = LatinHypercubeSample(),
-    resampling = false,
-    minibatch = 100)
-quasirandom_strategy_resampling = QuasiRandomTraining(100;
-    bcs_points = 50,
-    sampling_alg = LatticeRuleSample(),
-    resampling = true,
-    minibatch = 0)
+quasirandom_strategy = QuasiRandomTraining(100; sampling_alg = LatinHypercubeSample(),
+    resampling = false, minibatch = 100)
+quasirandom_strategy_resampling = QuasiRandomTraining(100; bcs_points = 50,
+    sampling_alg = LatticeRuleSample(), resampling = true, minibatch = 0)
 
 strategies = [
     grid_strategy,
@@ -76,8 +64,8 @@ strategies = [
 ]
 
 @testset "Test ODE/Heterogeneous" begin
-    map(strategies) do strategy_
-        test_ode(strategy_)
+    @testset "$(nameof(typeof(strategy)))" for strategy in strategies
+        test_ode(strategy)
     end
 end
 
@@ -96,31 +84,25 @@ end
 
     bcs = [u(0, 0, 0) ~ 0.0]
 
-    domains = [x ∈ Interval(0.0, 1.0),
-        y ∈ Interval(0.0, 1.0),
-        z ∈ Interval(0.0, 1.0)]
+    domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0), z ∈ Interval(0.0, 1.0)]
 
     chain = [
-        Lux.Chain(Lux.Dense(3, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
-            Lux.Dense(12, 1)),
-        Lux.Chain(Lux.Dense(2, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
-            Lux.Dense(12, 1)),
-        Lux.Chain(Lux.Dense(1, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
-            Lux.Dense(12, 1)),
-        Lux.Chain(Lux.Dense(2, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
-            Lux.Dense(12, 1))]
-
-    grid_strategy = NeuralPDE.GridTraining(0.1)
-    quadrature_strategy = NeuralPDE.QuadratureTraining(quadrature_alg = CubatureJLh(),
-        reltol = 1e-3, abstol = 1e-3,
-        maxiters = 50, batch = 100)
+        Chain(Dense(3, 12, tanh), Dense(12, 12, tanh), Dense(12, 1)),
+        Chain(Dense(2, 12, tanh), Dense(12, 12, tanh), Dense(12, 1)),
+        Chain(Dense(1, 12, tanh), Dense(12, 12, tanh), Dense(12, 1)),
+        Chain(Dense(2, 12, tanh), Dense(12, 12, tanh), Dense(12, 1))
+    ]
 
-    discretization = NeuralPDE.PhysicsInformedNN(chain, grid_strategy)
+    grid_strategy = GridTraining(0.1)
+    quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
+        reltol = 1e-3, abstol = 1e-3, maxiters = 50, batch = 100)
+
+    discretization = PhysicsInformedNN(chain, grid_strategy)
 
     @named pde_system = PDESystem(eqs, bcs, domains, [x, y, z],
         [u(x, y, z), v(y, x), h(z), p(x, z)])
 
-    prob = NeuralPDE.discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
 
     callback = function (p, l)
         println("Current loss is: $l")
@@ -192,18 +174,17 @@ end
 
 @testset "Example 2, 2D Poisson equation" begin
     grid_strategy = GridTraining(0.1)
-    chain = Lux.Chain(Lux.Dense(2, 12, Lux.σ), Lux.Dense(12, 12, Lux.σ), Lux.Dense(12, 1))
+    chain = Chain(Dense(2, 12, σ), Dense(12, 12, σ), Dense(12, 1))
     test_2d_poisson_equation(chain, grid_strategy)
 
-    for strategy_ in strategies
-        chain_ = Lux.Chain(Lux.Dense(2, 12, Lux.σ), Lux.Dense(12, 12, Lux.σ),
-            Lux.Dense(12, 1))
-        test_2d_poisson_equation(chain_, strategy_)
+    @testset "$(nameof(typeof(strategy)))" for strategy in strategies
+        chain_ = Chain(Dense(2, 12, σ), Dense(12, 12, σ), Dense(12, 1))
+        test_2d_poisson_equation(chain_, strategy)
     end
-    algs = [CubatureJLp()] #CubatureJLh(),
-    for alg in algs
-        chain_ = Lux.Chain(Lux.Dense(2, 12, Lux.σ), Lux.Dense(12, 12, Lux.σ),
-            Lux.Dense(12, 1))
+
+    algs = [CubatureJLp()]
+    @testset "$(nameof(typeof(alg)))" for alg in algs
+        chain_ = Chain(Dense(2, 12, σ), Dense(12, 12, σ), Dense(12, 1))
         strategy_ = NeuralPDE.QuadratureTraining(quadrature_alg = alg, reltol = 1e-4,
             abstol = 1e-3, maxiters = 30, batch = 10)
         test_2d_poisson_equation(chain_, strategy_)
@@ -233,9 +214,8 @@ end
     domains = [x ∈ Interval(0.0, 1.0)]
 
     # Neural network
-    chain = [[Lux.Chain(Lux.Dense(1, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
-                  Lux.Dense(12, 1)) for _ in 1:3]
-             [Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1)) for _ in 1:2]]
+    chain = [[Chain(Dense(1, 12, tanh), Dense(12, 12, tanh), Dense(12, 1)) for _ in 1:3]
+             [Chain(Dense(1, 4, tanh), Dense(4, 1)) for _ in 1:2]]
     quasirandom_strategy = QuasiRandomTraining(100; sampling_alg = LatinHypercubeSample())
 
     discretization = PhysicsInformedNN(chain, quasirandom_strategy)
@@ -286,8 +266,8 @@ end
     domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
 
     # Neural network
-    chain1 = Lux.Chain(Lux.Dense(2, 15, Lux.tanh), Lux.Dense(15, 1))
-    chain2 = Lux.Chain(Lux.Dense(2, 15, Lux.tanh), Lux.Dense(15, 1))
+    chain1 = Chain(Dense(2, 15, tanh), Dense(15, 1))
+    chain2 = Chain(Dense(2, 15, tanh), Dense(15, 1))
 
     quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
         reltol = 1e-3, abstol = 1e-3,
@@ -316,22 +296,24 @@ end
 end
 
 @testset "Example 5, 2d wave equation, neumann boundary condition" begin
-    #here we use low level api for build solution
+    # here we use low level api for build solution
     @parameters x, t
     @variables u(..)
     Dxx = Differential(x)^2
     Dtt = Differential(t)^2
     Dt = Differential(t)
 
-    #2D PDE
+    # 2D PDE
     C = 1
     eq = Dtt(u(x, t)) ~ C^2 * Dxx(u(x, t))
 
     # Initial and boundary conditions
-    bcs = [u(0, t) ~ 0.0,# for all t > 0
-        u(1, t) ~ 0.0,# for all t > 0
-        u(x, 0) ~ x * (1.0 - x), #for all 0 < x < 1
-        Dt(u(x, 0)) ~ 0.0] #for all  0 < x < 1]
+    bcs = [
+        u(0, t) ~ 0.0,           # for all t > 0
+        u(1, t) ~ 0.0,           # for all t > 0
+        u(x, 0) ~ x * (1.0 - x), # for all 0 < x < 1
+        Dt(u(x, 0)) ~ 0.0        # for all  0 < x < 1]
+    ]
 
     # Space and time domains
     domains = [x ∈ Interval(0.0, 1.0),
@@ -339,13 +321,12 @@ end
     @named pde_system = PDESystem(eq, bcs, domains, [x, t], [u(x, t)])
 
     # Neural network
-    chain = Lux.Chain(Lux.Dense(2, 16, Lux.σ), Lux.Dense(16, 16, Lux.σ), Lux.Dense(16, 1))
+    chain = Chain(Dense(2, 16, σ), Dense(16, 16, σ), Dense(16, 1))
     phi = NeuralPDE.Phi(chain)
     derivative = NeuralPDE.numeric_derivative
 
     quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
-        reltol = 1e-3, abstol = 1e-3,
-        maxiters = 50, batch = 100)
+        reltol = 1e-3, abstol = 1e-3, maxiters = 50, batch = 100)
 
     discretization = PhysicsInformedNN(chain, quadrature_strategy)
     prob = discretize(pde_system, discretization)
@@ -390,10 +371,8 @@ end
     domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
 
     quadrature_strategy = QuadratureTraining()
-    # Neural network
     inner = 20
-    chain = Lux.Chain(Lux.Dense(2, inner, Lux.tanh), Lux.Dense(inner, inner, Lux.tanh),
-        Lux.Dense(inner, 1))
+    chain = Chain(Dense(2, inner, tanh), Dense(inner, inner, tanh), Dense(inner, 1))
 
     discretization = PhysicsInformedNN(chain, quadrature_strategy)
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
@@ -426,7 +405,7 @@ end
 
     chain = Flux.Chain(Flux.Dense(1, 12, Flux.σ), Flux.Dense(12, 1))
     discretization = PhysicsInformedNN(chain, QuadratureTraining())
-    @test discretization.chain isa Lux.AbstractExplicitLayer
+    @test discretization.chain isa Lux.AbstractLuxLayer
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
     prob = discretize(pde_system, discretization)
diff --git a/test/NNPDE_tests_gpu_Lux.jl b/test/NNPDE_tests_gpu_Lux.jl
index 378c240165..90674b23ff 100644
--- a/test/NNPDE_tests_gpu_Lux.jl
+++ b/test/NNPDE_tests_gpu_Lux.jl
@@ -1,17 +1,14 @@
-using Lux, ComponentArrays, OptimizationOptimisers
-using Test, NeuralPDE
-using Optimization
-using LuxCUDA, QuasiMonteCarlo
+using Lux, ComponentArrays, OptimizationOptimisers, Test, NeuralPDE, Optimization, LuxCUDA,
+      QuasiMonteCarlo, Random
 import ModelingToolkit: Interval, infimum, supremum
 
-using Random
 Random.seed!(100)
 
 callback = function (p, l)
     println("Current loss is: $l")
     return false
 end
-CUDA.allowscalar(false)
+
 const gpud = gpu_device()
 
 @testset "ODE" begin
@@ -32,22 +29,16 @@ const gpud = gpu_device()
     dt = 0.1f0
     # Neural network
     inner = 20
-    chain = Lux.Chain(Lux.Dense(1, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, 1))
+    chain = Chain(Dense(1, inner, σ), Dense(inner, inner, σ), Dense(inner, inner, σ),
+        Dense(inner, inner, σ), Dense(inner, inner, σ), Dense(inner, 1))
 
     strategy = GridTraining(dt)
     ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud
-    discretization = PhysicsInformedNN(chain,
-        strategy;
-        init_params = ps)
+    discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u(θ)])
     prob = discretize(pde_system, discretization)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(1e-2); maxiters = 2000)
+    res = solve(prob, OptimizationOptimisers.Adam(1e-2); maxiters = 2000)
     phi = discretization.phi
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
@@ -73,13 +64,9 @@ end
     @named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
 
     inner = 30
-    chain = Lux.Chain(Lux.Dense(2, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, 1))
+    chain = Chain(Dense(2, inner, σ), Dense(inner, inner, σ),
+        Dense(inner, inner, σ), Dense(inner, inner, σ),
+        Dense(inner, inner, σ), Dense(inner, inner, σ), Dense(inner, 1))
 
     strategy = StochasticTraining(500)
     ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
@@ -119,11 +106,8 @@ end
     @named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
 
     inner = 20
-    chain = Lux.Chain(Lux.Dense(2, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, 1))
+    chain = Chain(Dense(2, inner, σ), Dense(inner, inner, σ),
+        Dense(inner, inner, σ), Dense(inner, inner, σ), Dense(inner, 1))
 
     strategy = QuasiRandomTraining(
         500; sampling_alg = SobolSample(), resampling = false, minibatch = 30)
@@ -173,11 +157,8 @@ end
 
     # Neural network
     inner = 25
-    chain = Lux.Chain(Lux.Dense(3, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, inner, Lux.σ),
-        Lux.Dense(inner, 1))
+    chain = Chain(Dense(3, inner, σ), Dense(inner, inner, σ),
+        Dense(inner, inner, σ), Dense(inner, inner, σ), Dense(inner, 1))
 
     strategy = GridTraining(0.05)
     ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
diff --git a/test/NNRODE_tests.jl b/test/NNRODE_tests.jl
deleted file mode 100644
index 59b890b4f2..0000000000
--- a/test/NNRODE_tests.jl
+++ /dev/null
@@ -1,40 +0,0 @@
-using Flux, OptimizationOptimisers, StochasticDiffEq, DiffEqNoiseProcess, Optim, Test
-using NeuralPDE
-
-using Random
-Random.seed!(100)
-
-println("Test Case 1")
-linear = (u, p, t, W) -> 2u * sin(W)
-tspan = (0.00f0, 1.00f0)
-u0 = 1.0f0
-dt = 1 / 50.0f0
-W = WienerProcess(0.0, 0.0, nothing)
-prob = RODEProblem(linear, u0, tspan, noise = W)
-chain = Flux.Chain(Dense(2, 8, relu), Dense(8, 16, relu), Dense(16, 1))
-opt = OptimizationOptimisers.Adam(1e-4)
-sol = solve(prob, NeuralPDE.NNRODE(chain, W, opt), dt = dt, verbose = true,
-    abstol = 1e-10, maxiters = 3000)
-W2 = NoiseWrapper(sol.W)
-prob1 = RODEProblem(linear, u0, tspan, noise = W2)
-sol2 = solve(prob1, RandomEM(), dt = dt)
-err = Flux.mse(sol.u, sol2.u)
-@test err < 0.3
-
-println("Test Case 2")
-linear = (u, p, t, W) -> t^3 + 2 * t + (t^2) * ((1 + 3 * (t^2)) / (1 + t + (t^3))) -
-                         u * (t + ((1 + 3 * (t^2)) / (1 + t + t^3))) + 5 * W
-tspan = (0.00f0, 1.00f0)
-u0 = 1.0f0
-dt = 1 / 100.0f0
-W = WienerProcess(0.0, 0.0, nothing)
-prob = RODEProblem(linear, u0, tspan, noise = W)
-chain = Flux.Chain(Dense(2, 32, sigmoid), Dense(32, 32, sigmoid), Dense(32, 1))
-opt = OptimizationOptimisers.Adam(1e-3)
-sol = solve(prob, NeuralPDE.NNRODE(chain, W, opt), dt = dt, verbose = true,
-    abstol = 1e-10, maxiters = 2000)
-W2 = NoiseWrapper(sol.W)
-prob1 = RODEProblem(linear, u0, tspan, noise = W2)
-sol2 = solve(prob1, RandomEM(), dt = dt)
-err = Flux.mse(sol.u, sol2.u)
-@test err < 0.4
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index 5259a019f1..6e9a6c059a 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -1,15 +1,10 @@
-using Optimization, OptimizationOptimisers
-using Test, NeuralPDE
+using Optimization, OptimizationOptimisers, Test, NeuralPDE, Random, DomainSets, Lux
 import ModelingToolkit: Interval, infimum, supremum
-using DomainSets
-using Random
-import Lux
 
-nonadaptive_loss = NeuralPDE.NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
-gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3,
-    bc_loss_weights = 1)
-adaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1,
+nonadaptive_loss = NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
+gradnormadaptive_loss = GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3,
     bc_loss_weights = 1)
+adaptive_loss = MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1)
 adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, adaptive_loss]
 maxiters = 4000
 seed = 60
@@ -17,11 +12,11 @@ seed = 60
 ## 2D Poisson equation
 function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxiters = 4000)
     Random.seed!(seed)
-    hid = 40
-    chain_ = Lux.Chain(Lux.Dense(2, hid, Lux.σ), Lux.Dense(hid, hid, Lux.σ),
-        Lux.Dense(hid, 1))
-    strategy_ = NeuralPDE.StochasticTraining(256)
-    @info "adaptive reweighting test outdir:, maxiters: $(maxiters), 2D Poisson equation, adaptive_loss: $(nameof(typeof(adaptive_loss))) "
+    hid = 32
+    chain_ = Chain(Dense(2, hid, tanh), Dense(hid, hid, tanh), Dense(hid, 1))
+
+    strategy_ = StochasticTraining(256)
+
     @parameters x y
     @variables u(..)
     Dxx = Differential(x)^2
@@ -38,11 +33,8 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxite
         y ∈ Interval(0.0, 1.0)]
 
     iteration = [0]
-    discretization = PhysicsInformedNN(chain_,
-        strategy_;
-        adaptive_loss = adaptive_loss,
-        logger = nothing,
-        iteration = iteration)
+    discretization = PhysicsInformedNN(chain_, strategy_; adaptive_loss, logger = nothing,
+        iteration)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
     prob = discretize(pde_system, discretization)
@@ -53,36 +45,24 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxite
         (length(xs), length(ys)))
 
     callback = function (p, l)
-        iteration[1] += 1
-        if iteration[1] % 100 == 0
-            @info "Current loss is: $l, iteration is $(iteration[1])"
+        iteration[] += 1
+        if iteration[] % 100 == 0
+            @info "Current loss is: $l, iteration is $(iteration[])"
         end
         return false
     end
-    res = solve(
-        prob, OptimizationOptimisers.Adam(0.03); maxiters = maxiters, callback = callback)
+    res = solve(prob, OptimizationOptimisers.Adam(0.03); maxiters, callback)
     u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
         (length(xs), length(ys)))
-    diff_u = abs.(u_predict .- u_real)
-    total_diff = sum(diff_u)
-    total_u = sum(abs.(u_real))
+    total_diff = sum(abs, u_predict .- u_real)
+    total_u = sum(abs, u_real)
     total_diff_rel = total_diff / total_u
-    (error = total_diff, total_diff_rel = total_diff_rel)
+    return (; error = total_diff, total_diff_rel)
 end
 
-@info "testing that the adaptive loss methods roughly succeed"
-function test_2d_poisson_equation_adaptive_loss_no_logs_run_seediters(adaptive_loss)
-    test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = seed, maxiters = maxiters)
-end
-error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_seediters,
-    adaptive_losses)
+@testset "$(nameof(typeof(adaptive_loss)))" for adaptive_loss in adaptive_losses
+    error_results_no_logs = test_2d_poisson_equation_adaptive_loss(
+        adaptive_loss; seed, maxiters)
 
-# accuracy tests
-@show error_results_no_logs[1][:total_diff_rel]
-@show error_results_no_logs[2][:total_diff_rel]
-@show error_results_no_logs[3][:total_diff_rel]
-# accuracy tests, these work for this specific seed but might not for others
-# note that this doesn't test that the adaptive losses are outperforming the nonadaptive loss, which is not guaranteed, and seed/arch/hyperparam/pde etc dependent
-@test error_results_no_logs[1][:total_diff_rel] < 0.4
-@test error_results_no_logs[2][:total_diff_rel] < 0.4
-@test error_results_no_logs[3][:total_diff_rel] < 0.4
+    @test error_results_no_logs[:total_diff_rel] < 0.4
+end
diff --git a/test/additional_loss_tests.jl b/test/additional_loss_tests.jl
index 3223c66620..25e67466af 100644
--- a/test/additional_loss_tests.jl
+++ b/test/additional_loss_tests.jl
@@ -1,12 +1,7 @@
-using NeuralPDE, Test
-using Optimization, OptimizationOptimJL, OptimizationOptimisers
-using QuasiMonteCarlo, Random
+using NeuralPDE, Test, Optimization, OptimizationOptimJL, OptimizationOptimisers,
+      QuasiMonteCarlo, Random, DomainSets, Integrals, Cubature, OrdinaryDiffEq,
+      ComponentArrays, Lux
 import ModelingToolkit: Interval, infimum, supremum
-using DomainSets
-using Integrals, Cubature
-using OrdinaryDiffEq, ComponentArrays
-import Lux
-using ComponentArrays
 
 @testset "Fokker-Planck" begin
     # the example took from this article https://arxiv.org/abs/1910.10503
@@ -20,7 +15,7 @@ using ComponentArrays
     # Discretization
     dx = 0.01
     # here we use normalization condition: dx*p(x) ~ 1, in order to get non-zero solution.
-    #(α - 3*β*x^2)*p(x) + (α*x - β*x^3)*Dx(p(x)) ~ (_σ^2/2)*Dxx(p(x))
+    # (α - 3*β*x^2)*p(x) + (α*x - β*x^3)*Dx(p(x)) ~ (_σ^2/2)*Dxx(p(x))
     eq = [Dx((α * x - β * x^3) * p(x)) ~ (_σ^2 / 2) * Dxx(p(x))]
     x_0 = -2.2
     x_end = 2.2
@@ -32,11 +27,9 @@ using ComponentArrays
 
     # Neural network
     inn = 18
-    chain = Lux.Chain(Lux.Dense(1, inn, Lux.σ),
-        Lux.Dense(inn, inn, Lux.σ),
-        Lux.Dense(inn, inn, Lux.σ),
-        Lux.Dense(inn, 1))
-    init_params = Float64.(ComponentArray(Lux.setup(Random.default_rng(), chain)[1]))
+    chain = Chain(Dense(1, inn, σ), Dense(inn, inn, σ), Dense(inn, inn, σ), Dense(inn, 1))
+    init_params = ComponentArray{Float64}(Lux.initialparameters(
+        Random.default_rng(), chain))
     lb = [x_0]
     ub = [x_end]
     function norm_loss_function(phi, θ, p)
@@ -45,7 +38,7 @@ using ComponentArrays
         end
         prob1 = IntegralProblem(inner_f, (lb, ub), θ)
         norm2 = solve(prob1, HCubatureJL(), reltol = 1e-8, abstol = 1e-8, maxiters = 10)
-        abs(norm2[1])
+        return abs(norm2[1])
     end
     discretization = PhysicsInformedNN(chain, GridTraining(dx); init_params = init_params,
         additional_loss = norm_loss_function)
@@ -113,8 +106,7 @@ end
 
     input_ = length(domains)
     n = 12
-    chain = [Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.σ),
-                 Lux.Dense(n, 1)) for _ in 1:3]
+    chain = [Chain(Dense(input_, n, tanh), Dense(n, n, σ), Dense(n, 1)) for _ in 1:3]
     #Generate Data
     function lorenz!(du, u, p, t)
         du[1] = 10.0 * (u[2] - u[1])
@@ -154,11 +146,8 @@ end
         for i in 1:1:3)
     end
 
-    discretization = PhysicsInformedNN(chain,
-        GridTraining(dt);
-        init_params = flat_init_params,
-        param_estim = true,
-        additional_loss = additional_loss)
+    discretization = PhysicsInformedNN(chain, GridTraining(dt);
+        init_params = flat_init_params, param_estim = true, additional_loss)
 
     additional_loss(discretization.phi, flat_init_params, nothing)
     @named pde_system = PDESystem(eqs, bcs, domains,
@@ -167,9 +156,7 @@ end
     prob = discretize(pde_system, discretization)
     sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
     sym_prob.loss_functions.full_loss_function(
-        ComponentArray(depvar = flat_init_params,
-            p = ones(3)),
-        Float64[])
+        ComponentArray(depvar = flat_init_params, p = ones(3)), Float64[])
 
     res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 6000)
     p_ = res.u[(end - 2):end]
@@ -178,10 +165,8 @@ end
     @test sum(abs2, p_[3] - (8 / 3)) < 0.1
 
     ### No init_params
-    discretization = PhysicsInformedNN(chain,
-        GridTraining(dt);
-        param_estim = true,
-        additional_loss = additional_loss)
+    discretization = PhysicsInformedNN(
+        chain, GridTraining(dt); param_estim = true, additional_loss)
 
     additional_loss(discretization.phi, flat_init_params, nothing)
     @named pde_system = PDESystem(eqs, bcs, domains,
@@ -207,10 +192,8 @@ end
     dx = pi / 10
     domain = [x ∈ Interval(x0, x_end)]
     hidden = 10
-    chain = Lux.Chain(Lux.Dense(1, hidden, Lux.tanh),
-        Lux.Dense(hidden, hidden, Lux.sin),
-        Lux.Dense(hidden, hidden, Lux.tanh),
-        Lux.Dense(hidden, 1))
+    chain = Chain(Dense(1, hidden, tanh), Dense(hidden, hidden, sin),
+        Dense(hidden, hidden, tanh), Dense(hidden, 1))
     strategy = GridTraining(dx)
     xs = collect(x0:dx:x_end)'
     aproxf_(x) = @. cos(pi * x)
diff --git a/test/dgm_test.jl b/test/dgm_test.jl
index de29888f96..2d458ec39c 100644
--- a/test/dgm_test.jl
+++ b/test/dgm_test.jl
@@ -1,9 +1,8 @@
 using NeuralPDE, Test
 
 using ModelingToolkit, Optimization, OptimizationOptimisers, Distributions, MethodOfLines,
-      OrdinaryDiffEq
+      OrdinaryDiffEq, LinearAlgebra
 import ModelingToolkit: Interval, infimum, supremum
-import Lux: tanh, identity
 
 @testset "Poisson's equation" begin
     @parameters x y
@@ -26,18 +25,16 @@ import Lux: tanh, identity
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
     prob = discretize(pde_system, discretization)
 
-    global iter = 0
     callback = function (p, l)
-        global iter += 1
-        if iter % 50 == 0
-            println("$iter => $l")
-        end
+        p.iter % 50 == 0 && println("$(p.iter) => $l")
         return false
     end
 
-    res = Optimization.solve(prob, Adam(0.01); callback = callback, maxiters = 500)
+    res = Optimization.solve(
+        prob, OptimizationOptimisers.Adam(0.01); callback, maxiters = 500)
     prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, Adam(0.001); callback = callback, maxiters = 200)
+    res = Optimization.solve(
+        prob, OptimizationOptimisers.Adam(0.001); callback, maxiters = 200)
     phi = discretization.phi
 
     xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
@@ -47,7 +44,8 @@ import Lux: tanh, identity
         (length(xs), length(ys)))
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
         (length(xs), length(ys)))
-    @test u_predict≈u_real atol=0.1
+
+    @test u_real≈u_predict atol=0.4
 end
 
 @testset "Black-Scholes PDE: European Call Option" begin
@@ -78,18 +76,14 @@ end
     @named pde_system = PDESystem(eq, bcs, domains, [t, x], [g(t, x)])
     prob = discretize(pde_system, discretization)
 
-    global iter = 0
     callback = function (p, l)
-        global iter += 1
-        if iter % 50 == 0
-            println("$iter => $l")
-        end
+        p.iter % 50 == 0 && println("$(p.iter) => $l")
         return false
     end
 
-    res = Optimization.solve(prob, Adam(0.1); callback = callback, maxiters = 100)
+    res = Optimization.solve(prob, Adam(0.1); callback, maxiters = 100)
     prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, Adam(0.01); callback = callback, maxiters = 500)
+    res = Optimization.solve(prob, Adam(0.01); callback, maxiters = 500)
     phi = discretization.phi
 
     function analytical_soln(t, x, K, σ, T)
@@ -143,12 +137,9 @@ end
     discretization = DeepGalerkin(2, 1, 50, 5, tanh, tanh, identity, strategy)
     @named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
     prob = discretize(pde_system, discretization)
-    global iter = 0
+
     callback = function (p, l)
-        global iter += 1
-        if iter % 20 == 0
-            println("$iter => $l")
-        end
+        p.iter % 50 == 0 && println("$(p.iter) => $l")
         return false
     end
 
@@ -159,5 +150,5 @@ end
 
     u_predict = [first(phi([t, x], res.u)) for t in ts, x in xs]
 
-    @test u_predict≈u_MOL rtol=0.025
+    @test u_predict≈u_MOL rtol=0.1
 end
diff --git a/test/direct_function_tests.jl b/test/direct_function_tests.jl
index 529c0fe64d..a4488296c1 100644
--- a/test/direct_function_tests.jl
+++ b/test/direct_function_tests.jl
@@ -1,10 +1,6 @@
-using NeuralPDE, Test
-using Optimization, OptimizationOptimJL, OptimizationOptimisers
-using QuasiMonteCarlo
+using NeuralPDE, Test, Optimization, OptimizationOptimJL, OptimizationOptimisers,
+      QuasiMonteCarlo, DomainSets, Random, Lux, Optimisers
 import ModelingToolkit: Interval, infimum, supremum
-using DomainSets
-using Random
-import Lux
 
 Random.seed!(110)
 
@@ -26,15 +22,13 @@ Random.seed!(110)
     func_s = func(xs)
 
     hidden = 10
-    chain = Lux.Chain(Lux.Dense(1, hidden, Lux.tanh),
-        Lux.Dense(hidden, hidden, Lux.tanh),
-        Lux.Dense(hidden, 1))
+    chain = Chain(Dense(1, hidden, tanh), Dense(hidden, hidden, tanh), Dense(hidden, 1))
 
     strategy = GridTraining(0.01)
     discretization = PhysicsInformedNN(chain, strategy)
     @named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
     prob = discretize(pde_system, discretization)
-    res = solve(prob, OptimizationOptimisers.Adam(0.05), maxiters = 1000)
+    res = solve(prob, Optimisers.Adam(0.05), maxiters = 1000)
     prob = remake(prob, u0 = res.u)
     res = solve(prob, OptimizationOptimJL.BFGS(initial_stepnorm = 0.01), maxiters = 500)
     @test discretization.phi(xs', res.u)≈func(xs') rtol=0.01
@@ -52,10 +46,8 @@ end
     domain = [x ∈ Interval(x0, x_end)]
 
     hidden = 20
-    chain = Lux.Chain(Lux.Dense(1, hidden, Lux.sin),
-        Lux.Dense(hidden, hidden, Lux.sin),
-        Lux.Dense(hidden, hidden, Lux.sin),
-        Lux.Dense(hidden, 1))
+    chain = Chain(Dense(1, hidden, sin), Dense(hidden, hidden, sin),
+        Dense(hidden, hidden, sin), Dense(hidden, 1))
 
     strategy = GridTraining(0.01)
     discretization = PhysicsInformedNN(chain, strategy)
@@ -83,10 +75,8 @@ end
     d = 0.4
     domain = [x ∈ Interval(x0, x_end), y ∈ Interval(y0, y_end)]
     hidden = 25
-    chain = Lux.Chain(Lux.Dense(2, hidden, Lux.tanh),
-        Lux.Dense(hidden, hidden, Lux.tanh),
-        Lux.Dense(hidden, hidden, Lux.tanh),
-        Lux.Dense(hidden, 1))
+    chain = Chain(Dense(2, hidden, tanh), Dense(hidden, hidden, tanh),
+        Dense(hidden, hidden, tanh), Dense(hidden, 1))
 
     strategy = GridTraining(d)
     discretization = PhysicsInformedNN(chain, strategy)
diff --git a/test/forward_tests.jl b/test/forward_tests.jl
index 95d061c05e..77ece61c7e 100644
--- a/test/forward_tests.jl
+++ b/test/forward_tests.jl
@@ -1,9 +1,5 @@
-using Test, NeuralPDE
-using SciMLBase
-using DomainSets
+using Test, NeuralPDE, SciMLBase, DomainSets, Lux, Random, Zygote, ComponentArrays, Adapt
 import ModelingToolkit: Interval
-import Lux, Random, Zygote
-using ComponentArrays
 
 @testset "ODE" begin
     @parameters x
@@ -13,13 +9,13 @@ using ComponentArrays
     eq = Dx(u(x)) ~ 0.0
     bcs = [u(0.0) ~ u(0.0)]
     domains = [x ∈ Interval(0.0, 1.0)]
-    chain = Lux.Chain(x -> x .^ 2)
+    chain = Chain(x -> x .^ 2)
     init_params, st = Lux.setup(Random.default_rng(), chain)
-    init_params = Float64[]
+    init_params = init_params |> ComponentArray{Float64}
 
-    chain([1], Float64[], st)
+    chain([1], init_params, st)
     strategy_ = GridTraining(0.1)
-    discretization = PhysicsInformedNN(chain, strategy_; init_params = Float64[])
+    discretization = PhysicsInformedNN(chain, strategy_; init_params)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
     prob = discretize(pde_system, discretization)
     sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
@@ -30,26 +26,24 @@ using ComponentArrays
     dx = strategy_.dx
     eltypeθ = eltype(sym_prob.flat_init_params)
     depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = NeuralPDE.get_vars(
-        pde_system.ivs,
-        pde_system.dvs)
+        pde_system.ivs, pde_system.dvs)
 
     train_sets = generate_training_sets(domains, dx, eqs, bcs, eltypeθ,
         dict_indvars, dict_depvars)
 
     pde_train_sets, bcs_train_sets = train_sets
-    pde_train_sets = NeuralPDE.adapt(eltypeθ, pde_train_sets)[1]
+    pde_train_sets = Adapt.adapt(eltypeθ, pde_train_sets)[1]
 
     train_data = pde_train_sets
     pde_loss_function = sym_prob.loss_functions.datafree_pde_loss_functions[1]
 
     dudx(x) = @. 2 * x
-    @test pde_loss_function(train_data, Float64[])≈dudx(train_data) rtol=1e-8
+    @test pde_loss_function(train_data, init_params)≈dudx(train_data) rtol=1e-8
 end
 
 @testset "derivatives" begin
-    chain = Lux.Chain(Lux.Dense(2, 16, Lux.σ), Lux.Dense(16, 16, Lux.σ),
-        Lux.Dense(16, 1))
-    init_params = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray .|> Float64
+    chain = Chain(Dense(2, 16, σ), Dense(16, 16, σ), Dense(16, 1))
+    init_params = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray{Float64}
 
     eltypeθ = eltype(init_params)
     phi = NeuralPDE.Phi(chain)
@@ -88,14 +82,13 @@ end
 end
 
 @testset "Integral" begin
-    #semi-infinite intervals
     @parameters x
     @variables u(..)
     I = Integral(x in ClosedInterval(0, Inf))
     eq = I(u(x)) ~ 0
     bcs = [u(1.0) ~ exp(1) / (exp(2) + 3)]
     domains = [x ∈ Interval(1.0, 2.0)]
-    chain = Lux.Chain(x -> exp.(x) ./ (exp.(2 .* x) .+ 3))
+    chain = Chain(x -> exp.(x) ./ (exp.(2 .* x) .+ 3))
     init_params, st = Lux.setup(Random.default_rng(), chain)
     chain([1], init_params, st)
     strategy_ = GridTraining(0.1)
@@ -115,7 +108,7 @@ end
     eqs = I(u(x)) ~ 0
     domains = [x ∈ Interval(1.0, 2.0)]
     bcs = [u(1) ~ u(1)]
-    chain = Lux.Chain(x -> x .* exp.(-x .^ 2))
+    chain = Chain(x -> x .* exp.(-x .^ 2))
     chain([1], init_params, st)
 
     discretization = PhysicsInformedNN(chain, strategy_;
@@ -125,5 +118,5 @@ end
     prob = discretize(pde_system, discretization)
     inner_loss = sym_prob.loss_functions.datafree_pde_loss_functions[1]
     exact_u = 0
-    @test inner_loss(ones(1, 1), init_params)[1]≈exact_u rtol=1e-9
+    @test inner_loss(ones(1, 1), init_params)[1]≈exact_u atol=1e-13
 end
diff --git a/test/logging_tests.jl b/test/logging_tests.jl
new file mode 100644
index 0000000000..36add38a37
--- /dev/null
+++ b/test/logging_tests.jl
@@ -0,0 +1,102 @@
+using Test, NeuralPDE, Optimization, OptimizationOptimisers, Random, Lux
+import ModelingToolkit: Interval, infimum, supremum
+
+nonadaptive_loss = NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
+gradnormadaptive_loss = GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3,
+    bc_loss_weights = 1)
+adaptive_loss = MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1)
+adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, adaptive_loss]
+
+possible_logger_dir = mktempdir()
+if ENV["LOG_SETTING"] == "NoImport"
+    haslogger = false
+    expected_log_folders = 0
+elseif ENV["LOG_SETTING"] == "ImportNoUse"
+    using TensorBoardLogger
+    haslogger = false
+    expected_log_folders = 0
+elseif ENV["LOG_SETTING"] == "ImportUse"
+    using TensorBoardLogger
+    haslogger = true
+    expected_log_folders = 3
+end
+
+@info "has logger: $(haslogger), expected log folders: $(expected_log_folders)"
+
+function test_2d_poisson_equation_adaptive_loss(adaptive_loss, run, outdir, haslogger;
+        seed = 60, maxiters = 800)
+    logdir = joinpath(outdir, string(run))
+    logger = haslogger ? TBLogger(logdir) : nothing
+
+    Random.seed!(seed)
+    hid = 40
+    chain_ = Chain(Dense(2, hid, σ), Dense(hid, hid, σ), Dense(hid, 1))
+    strategy_ = StochasticTraining(256)
+
+    @parameters x y
+    @variables u(..)
+    Dxx = Differential(x)^2
+    Dyy = Differential(y)^2
+
+    # 2D PDE
+    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sinpi(x) * sinpi(y)
+
+    # Initial and boundary conditions
+    bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sinpi(1) * sinpi(y),
+        u(x, 0) ~ 0.0, u(x, 1) ~ -sinpi(x) * sinpi(1)]
+    # Space and time domains
+    domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
+
+    discretization = PhysicsInformedNN(chain_, strategy_; adaptive_loss, logger)
+
+    @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+    prob = NeuralPDE.discretize(pde_system, discretization)
+    phi = discretization.phi
+
+    xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
+    sz = (length(xs), length(ys))
+    analytic_sol_func(x, y) = (sinpi(x) * sinpi(y)) / (2pi^2)
+    u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys], sz)
+
+    callback = function (p, l)
+        if p.iter % 100 == 0
+            @info "Current loss is: $l, iteration is $(p.iter)"
+        end
+        if haslogger
+            log_value(logger, "outer_error/loss", l, step = p.iter)
+            if p.iter % 30 == 0
+                u_predict = reshape([first(phi([x, y], p.u)) for x in xs for y in ys],
+                    (length(xs), length(ys)))
+                total_diff = sum(abs, u_predict .- u_real)
+                log_value(logger, "outer_error/total_diff", total_diff, step = p.iter)
+                log_value(logger, "outer_error/total_diff_rel",
+                    total_diff / sum(abs2, u_real), step = p.iter)
+                log_value(logger, "outer_error/total_diff_sq",
+                    sum(abs2, u_predict .- u_real), step = p.iter)
+            end
+        end
+        return false
+    end
+    res = solve(prob, OptimizationOptimisers.Adam(0.03); maxiters, callback)
+
+    u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys], sz)
+    diff_u = abs.(u_predict .- u_real)
+    total_diff = sum(diff_u)
+    total_u = sum(abs.(u_real))
+    total_diff_rel = total_diff / total_u
+
+    return (error = total_diff, total_diff_rel = total_diff_rel)
+end
+
+@testset "$(nameof(typeof(adaptive_loss)))" for (i, adaptive_loss) in enumerate(adaptive_losses)
+    test_2d_poisson_equation_adaptive_loss(adaptive_loss, i, possible_logger_dir,
+        haslogger; seed = 60, maxiters = 800)
+end
+
+@test length(readdir(possible_logger_dir)) == expected_log_folders
+if expected_log_folders > 0
+    @info "dirs at $(possible_logger_dir): $(string(readdir(possible_logger_dir)))"
+    for logdir in readdir(possible_logger_dir)
+        @test length(readdir(joinpath(possible_logger_dir, logdir))) > 0
+    end
+end
diff --git a/test/neural_adapter_tests.jl b/test/neural_adapter_tests.jl
index bf7316fe91..609df34c29 100644
--- a/test/neural_adapter_tests.jl
+++ b/test/neural_adapter_tests.jl
@@ -1,15 +1,12 @@
-using Test, NeuralPDE
-using Optimization
+using Test, NeuralPDE, Optimization, Lux, OptimizationOptimisers, Statistics,
+      ComponentArrays, Random, LinearAlgebra
 import ModelingToolkit: Interval, infimum, supremum
-import Lux, OptimizationOptimisers
-using Statistics
-using ComponentArrays
 
-using Random
 Random.seed!(100)
 
 callback = function (p, l)
-    println("Current loss is: $l")
+    (p.iter == 1 || p.iter % 500 == 0) &&
+        println("Current loss is: $l after $(p.iter) iterations")
     return false
 end
 
@@ -20,45 +17,45 @@ end
     Dyy = Differential(y)^2
 
     # 2D PDE
-    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sinpi(x) * sinpi(y)
 
     # Initial and boundary conditions
-    bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sin(pi * 1) * sin(pi * y),
-        u(x, 0) ~ 0.0, u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
+    bcs = [
+        u(0, y) ~ 0.0,
+        u(1, y) ~ -sinpi(1) * sinpi(y),
+        u(x, 0) ~ 0.0,
+        u(x, 1) ~ -sinpi(x) * sinpi(1)
+    ]
     # Space and time domains
-    domains = [x ∈ Interval(0.0, 1.0),
-        y ∈ Interval(0.0, 1.0)]
-    quadrature_strategy = NeuralPDE.QuadratureTraining(reltol = 1e-3, abstol = 1e-6,
-        maxiters = 50, batch = 100)
+    domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
+    quadrature_strategy = QuadratureTraining(
+        reltol = 1e-3, abstol = 1e-6, maxiters = 50, batch = 100)
     inner = 8
-    af = Lux.tanh
-    chain1 = Lux.Chain(Lux.Dense(2, inner, af),
-        Lux.Dense(inner, inner, af),
-        Lux.Dense(inner, 1))
-    init_params = Lux.setup(Random.default_rng(), chain1)[1] |> ComponentArray .|> Float64
-    discretization = NeuralPDE.PhysicsInformedNN(chain1,
-        quadrature_strategy;
-        init_params = init_params)
+    af = tanh
+    chain1 = Chain(Dense(2, inner, af), Dense(inner, inner, af), Dense(inner, 1))
+    discretization = PhysicsInformedNN(chain1, quadrature_strategy)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-    prob = NeuralPDE.discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
     println("Poisson equation, strategy: $(nameof(typeof(quadrature_strategy)))")
-    @time res = solve(prob, OptimizationOptimisers.Adam(5e-3); maxiters = 10000)
+    @time res = solve(prob, Optimisers.Adam(5e-3); callback, maxiters = 2000)
     phi = discretization.phi
 
+    xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
+    analytic_sol_func(x, y) = (sinpi(x) * sinpi(y)) / (2pi^2)
+
+    u_predict = [first(phi([x, y], res.u)) for x in xs for y in ys]
+    u_real = [analytic_sol_func(x, y) for x in xs for y in ys]
+
+    @test u_predict≈u_real atol=5e-2 norm=Base.Fix2(norm, Inf)
+
     inner_ = 8
-    af = Lux.tanh
-    chain2 = Lux.Chain(Lux.Dense(2, inner_, af),
-        Lux.Dense(inner_, inner_, af),
-        Lux.Dense(inner_, inner_, af),
-        Lux.Dense(inner_, 1))
+    af = tanh
+    chain2 = Chain(Dense(2, inner_, af), Dense(inner_, inner_, af), Dense(inner_, 1))
     initp, st = Lux.setup(Random.default_rng(), chain2)
-    init_params2 = Float64.(ComponentArrays.ComponentArray(initp))
+    init_params2 = ComponentArray{Float64}(initp)
 
-    function loss(cord, θ)
-        ch2, st = chain2(cord, θ, st)
-        ch2 .- phi(cord, res.u)
-    end
+    loss(cord, θ) = first(chain2(cord, θ, st)) .- phi(cord, res.u)
 
     grid_strategy = GridTraining(0.05)
     quadrature_strategy = QuadratureTraining(
@@ -66,45 +63,16 @@ end
     stochastic_strategy = StochasticTraining(1000)
     quasirandom_strategy = QuasiRandomTraining(1000, minibatch = 200, resampling = true)
 
-    strategies1 = [grid_strategy, quadrature_strategy]
-    reses_1 = map(strategies1) do strategy_
-        println("Neural adapter Poisson equation, strategy: $(nameof(typeof(strategy_)))")
-        prob_ = NeuralPDE.neural_adapter(loss, init_params2, pde_system, strategy_)
-        @time res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 10000)
-    end
+    @testset "$(nameof(typeof(strategy_)))" for strategy_ in [
+        grid_strategy, quadrature_strategy, stochastic_strategy, quasirandom_strategy]
+        prob_ = neural_adapter(loss, init_params2, pde_system, strategy_)
+        @time res_ = solve(prob_, Optimisers.Adam(5e-3); callback, maxiters = 2000)
+        discretization = PhysicsInformedNN(chain2, strategy_; init_params = res_.u)
+        phi_ = discretization.phi
 
-    strategies2 = [stochastic_strategy, quasirandom_strategy]
-    reses_2 = map(strategies2) do strategy_
-        println("Neural adapter Poisson equation, strategy: $(nameof(typeof(strategy_)))")
-        prob_ = NeuralPDE.neural_adapter(loss, init_params2, pde_system, strategy_)
-        @time res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 10000)
+        u_predict_ = [first(phi_([x, y], res_.u)) for x in xs for y in ys]
+        @test u_predict_≈u_real atol=5e-2 norm=Base.Fix2(norm, Inf)
     end
-
-    reses_ = [reses_1; reses_2]
-    discretizations = map(
-        res_ -> PhysicsInformedNN(chain2, grid_strategy; init_params = res_.u), reses_)
-    probs = map(discret -> discretize(pde_system, discret), discretizations)
-    phis = map(discret -> discret.phi, discretizations)
-
-    xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-    analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-
-    u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-        (length(xs), length(ys)))
-
-    u_predicts = map(zip(phis, reses_)) do (phi_, res_)
-        reshape([first(phi_([x, y], res_.u)) for x in xs for y in ys],
-            (length(xs), length(ys)))
-    end
-
-    u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-        (length(xs), length(ys)))
-
-    @test u_predict≈u_real rtol=1e-1
-    @test u_predicts[1]≈u_real rtol=1e-1
-    @test u_predicts[2]≈u_real rtol=1e-1
-    @test u_predicts[3]≈u_real rtol=1e-1
-    @test u_predicts[4]≈u_real rtol=1e-1
 end
 
 @testset "Example, 2D Poisson equation, domain decomposition" begin
@@ -113,10 +81,10 @@ end
     Dxx = Differential(x)^2
     Dyy = Differential(y)^2
 
-    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sinpi(x) * sinpi(y)
 
-    bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sin(pi * 1) * sin(pi * y),
-        u(x, 0) ~ 0.0, u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
+    bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sinpi(1) * sinpi(y),
+        u(x, 0) ~ 0.0, u(x, 1) ~ -sinpi(x) * sinpi(1)]
 
     # Space
     x_0 = 0.0
@@ -127,37 +95,28 @@ end
     count_decomp = 10
 
     # Neural network
-    af = Lux.tanh
+    af = tanh
     inner = 12
-    chains = [Lux.Chain(Lux.Dense(2, inner, af), Lux.Dense(inner, inner, af),
-                  Lux.Dense(inner, 1)) for _ in 1:count_decomp]
-    init_params = map(
-        c -> Float64.(ComponentArrays.ComponentArray(Lux.setup(Random.default_rng(),
-            c)[1])),
-        chains)
+    chains = [Chain(Dense(2, inner, af), Dense(inner, inner, af), Dense(inner, 1))
+              for _ in 1:count_decomp]
 
     xs_ = infimum(x_domain):(1 / count_decomp):supremum(x_domain)
     xs_domain = [(xs_[i], xs_[i + 1]) for i in 1:(length(xs_) - 1)]
     domains_map = map(xs_domain) do (xs_dom)
         x_domain_ = Interval(xs_dom...)
-        domains_ = [x ∈ x_domain_,
-            y ∈ y_domain]
+        domains_ = [x ∈ x_domain_, y ∈ y_domain]
     end
 
-    analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+    analytic_sol_func(x, y) = (sinpi(x) * sinpi(y)) / (2pi^2)
     function create_bcs(x_domain_, phi_bound)
         x_0, x_e = x_domain_.left, x_domain_.right
         if x_0 == 0.0
-            bcs = [u(0, y) ~ 0.0,
-                u(x_e, y) ~ analytic_sol_func(x_e, y),
-                u(x, 0) ~ 0.0,
-                u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
+            bcs = [u(0, y) ~ 0.0, u(x_e, y) ~ analytic_sol_func(x_e, y),
+                u(x, 0) ~ 0.0, u(x, 1) ~ -sinpi(x) * sinpi(1)]
             return bcs
         end
-        bcs = [u(x_0, y) ~ phi_bound(x_0, y),
-            u(x_e, y) ~ analytic_sol_func(x_e, y),
-            u(x, 0) ~ 0.0,
-            u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
+        bcs = [u(x_0, y) ~ phi_bound(x_0, y), u(x_e, y) ~ analytic_sol_func(x_e, y),
+            u(x, 0) ~ 0.0, u(x, 1) ~ -sinpi(x) * sinpi(1)]
         bcs
     end
 
@@ -167,6 +126,7 @@ end
 
     for i in 1:count_decomp
         println("decomposition $i")
+
         domains_ = domains_map[i]
         phi_in(cord) = phis[i - 1](cord, reses[i - 1].u)
         phi_bound(x, y) = phi_in(vcat(x, y))
@@ -176,13 +136,12 @@ end
         @named pde_system_ = PDESystem(eq, bcs_, domains_, [x, y], [u(x, y)])
         push!(pde_system_map, pde_system_)
         strategy = GridTraining([0.1 / count_decomp, 0.1])
-        discretization = PhysicsInformedNN(
-            chains[i], strategy; init_params = init_params[i])
+        discretization = PhysicsInformedNN(chains[i], strategy)
         prob = discretize(pde_system_, discretization)
-        @time res_ = Optimization.solve(
-            prob, OptimizationOptimisers.Adam(5e-3), maxiters = 10000)
+        @time res_ = solve(prob, Optimisers.Adam(5e-3); callback, maxiters = 2000)
         @show res_.objective
         phi = discretization.phi
+
         push!(reses, res_)
         push!(phis, phi)
     end
@@ -217,42 +176,35 @@ end
     u_predict, diff_u = compose_result(dx)
 
     inner_ = 18
-    af = Lux.tanh
-    chain2 = Lux.Chain(Lux.Dense(2, inner_, af),
-        Lux.Dense(inner_, inner_, af),
-        Lux.Dense(inner_, inner_, af),
-        Lux.Dense(inner_, inner_, af),
-        Lux.Dense(inner_, 1))
+    af = tanh
+    chain2 = Chain(Dense(2, inner_, af), Dense(inner_, inner_, af),
+        Dense(inner_, inner_, af), Dense(inner_, inner_, af), Dense(inner_, 1))
 
     initp, st = Lux.setup(Random.default_rng(), chain2)
-    init_params2 = Float64.(ComponentArrays.ComponentArray(initp))
+    init_params2 = ComponentArray{Float64}(initp)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 
     losses = map(1:count_decomp) do i
-        function loss(cord, θ)
-            ch2, st = chain2(cord, θ, st)
-            ch2 .- phis[i](cord, reses[i].u)
-        end
+        loss(cord, θ) = first(chain2(cord, θ, st)) .- phis[i](cord, reses[i].u)
     end
 
-    prob_ = NeuralPDE.neural_adapter(losses, init_params2, pde_system_map,
-        GridTraining([0.1 / count_decomp, 0.1]))
-    @time res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
+    prob_ = neural_adapter(
+        losses, init_params2, pde_system_map, GridTraining([0.1 / count_decomp, 0.1]))
+    @time res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); callback, maxiters = 2000)
     @show res_.objective
-    prob_ = NeuralPDE.neural_adapter(losses, res_.u, pde_system_map,
-        GridTraining(0.01))
-    @time res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
+    prob_ = neural_adapter(losses, res_.u, pde_system_map, GridTraining(0.01))
+    @time res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); callback, maxiters = 2000)
     @show res_.objective
 
     phi_ = NeuralPDE.Phi(chain2)
     xs, ys = [infimum(d.domain):dx:supremum(d.domain) for d in domains]
-    u_predict_ = reshape([first(phi_([x, y], res_.u)) for x in xs for y in ys],
-        (length(xs), length(ys)))
-    u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-        (length(xs), length(ys)))
+    u_predict_ = reshape(
+        [first(phi_([x, y], res_.u)) for x in xs for y in ys], (length(xs), length(ys)))
+    u_real = reshape(
+        [analytic_sol_func(x, y) for x in xs for y in ys], (length(xs), length(ys)))
     diff_u_ = u_predict_ .- u_real
 
-    @test u_predict≈u_real rtol=1e-1
-    @test u_predict_≈u_real rtol=1e-1
+    @test u_predict≈u_real atol=5e-2 norm=Base.Fix2(norm, Inf)
+    @test u_predict_≈u_real atol=5e-2 norm=Base.Fix2(norm, Inf)
 end
diff --git a/test/qa.jl b/test/qa.jl
index b8db350a84..9df0e603b2 100644
--- a/test/qa.jl
+++ b/test/qa.jl
@@ -1,11 +1,12 @@
-using NeuralPDE, Aqua
+using NeuralPDE, Aqua, ExplicitImports
+
 @testset "Aqua" begin
-    Aqua.find_persistent_tasks_deps(NeuralPDE)
+    Aqua.test_all(NeuralPDE; ambiguities = false)
     Aqua.test_ambiguities(NeuralPDE, recursive = false)
-    Aqua.test_deps_compat(NeuralPDE)
-    Aqua.test_piracies(NeuralPDE)
-    Aqua.test_project_extras(NeuralPDE)
-    Aqua.test_stale_deps(NeuralPDE)
-    Aqua.test_unbound_args(NeuralPDE)
-    Aqua.test_undefined_exports(NeuralPDE)
+end
+
+@testset "ExplicitImports" begin
+    @test check_no_implicit_imports(NeuralPDE) === nothing
+    @test check_no_stale_explicit_imports(NeuralPDE) === nothing
+    @test check_all_qualified_accesses_via_owners(NeuralPDE) === nothing
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index e6248eae60..16ebea0e05 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,103 +1,64 @@
-using Pkg
-using SafeTestsets
+using Pkg, SafeTestsets, Test
 
 const GROUP = get(ENV, "GROUP", "All")
 
-const is_APPVEYOR = Sys.iswindows() && haskey(ENV, "APPVEYOR")
-
-function dev_subpkg(subpkg)
-    subpkg_path = joinpath(dirname(@__DIR__), "lib", subpkg)
-    Pkg.develop(PackageSpec(path = subpkg_path))
-end
-
 @time begin
     if GROUP == "All" || GROUP == "QA"
-        @time @safetestset "Quality Assurance" begin
-            include("qa.jl")
-        end
+        @time @safetestset "Quality Assurance" include("qa.jl")
     end
+
     if GROUP == "All" || GROUP == "ODEBPINN"
-        @time @safetestset "Bpinn ODE solver" begin
-            include("BPINN_Tests.jl")
-        end
+        @time @safetestset "BPINN ODE solver" include("BPINN_Tests.jl")
     end
 
     if GROUP == "All" || GROUP == "PDEBPINN"
-        @time @safetestset "Bpinn PDE solver" begin
-            include("BPINN_PDE_tests.jl")
-        end
-        @time @safetestset "Bpinn PDE invaddloss solver" begin
-            include("BPINN_PDEinvsol_tests.jl")
-        end
+        @time @safetestset "BPINN PDE solver" include("BPINN_PDE_tests.jl")
+        @time @safetestset "BPINN PDE invaddloss solver" include("BPINN_PDEinvsol_tests.jl")
     end
 
     if GROUP == "All" || GROUP == "NNPDE1"
-        @time @safetestset "NNPDE" begin
-            include("NNPDE_tests.jl")
-        end
+        @time @safetestset "NNPDE" include("NNPDE_tests.jl")
     end
+
     if GROUP == "All" || GROUP == "NNODE"
-        @time @safetestset "NNODE" begin
-            include("NNODE_tests.jl")
-        end
-        @time @safetestset "NNODE_tstops" begin
-            include("NNODE_tstops_test.jl")
-        end
-        @time @safetestset "NNDAE" begin
-            include("NNDAE_tests.jl")
-        end
+        @time @safetestset "NNODE" include("NNODE_tests.jl")
+        @time @safetestset "NNODE_tstops" include("NNODE_tstops_test.jl")
+        @time @safetestset "NNDAE" include("NNDAE_tests.jl")
     end
 
     if GROUP == "All" || GROUP == "NNPDE2"
-        @time @safetestset "Additional Loss" begin
-            include("additional_loss_tests.jl")
-        end
-        @time @safetestset "Direction Function Approximation" begin
-            include("direct_function_tests.jl")
-        end
+        @time @safetestset "Additional Loss" include("additional_loss_tests.jl")
+        @time @safetestset "Direction Function Approximation" include("direct_function_tests.jl")
     end
+
     if GROUP == "All" || GROUP == "NeuralAdapter"
-        @time @safetestset "NeuralAdapter" begin
-            include("neural_adapter_tests.jl")
-        end
+        @time @safetestset "NeuralAdapter" include("neural_adapter_tests.jl")
     end
+
     if GROUP == "All" || GROUP == "IntegroDiff"
-        @time @safetestset "IntegroDiff" begin
-            include("IDE_tests.jl")
-        end
-    end
-    if GROUP == "All" || GROUP == "AdaptiveLoss"
-        @time @safetestset "AdaptiveLoss" begin
-            include("adaptive_loss_tests.jl")
-        end
+        @time @safetestset "IntegroDiff" include("IDE_tests.jl")
     end
 
-    #=
-    # Fails because it uses sciml_train
-    if GROUP == "All" || GROUP == "NNRODE"
-        @time @safetestset "NNRODE" begin include("NNRODE_tests.jl") end
+    if GROUP == "All" || GROUP == "AdaptiveLoss"
+        @time @safetestset "AdaptiveLoss" include("adaptive_loss_tests.jl")
     end
-    =#
 
     if GROUP == "All" || GROUP == "Forward"
-        @time @safetestset "Forward" begin
-            include("forward_tests.jl")
-        end
+        @time @safetestset "Forward" include("forward_tests.jl")
     end
+
     if GROUP == "All" || GROUP == "Logging"
-        dev_subpkg("NeuralPDELogging")
-        subpkg_path = joinpath(dirname(@__DIR__), "lib", "NeuralPDELogging")
-        Pkg.test(PackageSpec(name = "NeuralPDELogging", path = subpkg_path))
-    end
-    if !is_APPVEYOR && GROUP == "GPU"
-        @safetestset "NNPDE_gpu_Lux" begin
-            include("NNPDE_tests_gpu_Lux.jl")
+        @testset for log_setting in ["NoImport", "ImportNoUse", "ImportUse"]
+            ENV["LOG_SETTING"] = log_setting
+            @time @safetestset "Logging" include("logging_tests.jl")
         end
     end
 
+    if GROUP == "CUDA"
+        @safetestset "NNPDE_gpu_Lux" include("NNPDE_tests_gpu_Lux.jl")
+    end
+
     if GROUP == "All" || GROUP == "DGM"
-        @time @safetestset "Deep Galerkin solver" begin
-            include("dgm_test.jl")
-        end
+        @time @safetestset "Deep Galerkin solver" include("dgm_test.jl")
     end
 end

From 3846fd056358e6c1b7af2a88cdcb470401ecf0cf Mon Sep 17 00:00:00 2001
From: Anant Thazhemadam <anant.thazhemadam@gmail.com>
Date: Thu, 17 Oct 2024 10:58:27 +0530
Subject: [PATCH 053/107] ci: test with `1`, `lts` and `pre` versions of julia

---
 .github/workflows/Tests.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml
index b1b5ecd8f4..b759a860b2 100644
--- a/.github/workflows/Tests.yml
+++ b/.github/workflows/Tests.yml
@@ -24,7 +24,9 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - "1.10"
+          - "1"
+          - "lts"
+          - "pre"
         group:
           - "QA"
           - "ODEBPINN"
@@ -41,6 +43,6 @@ jobs:
     uses: "SciML/.github/.github/workflows/tests.yml@v1"
     with:
       group: "${{ matrix.group }}"
-      coverage-directories: "src,ext"
       julia-version: "${{ matrix.version }}"
+      coverage-directories: "src,ext"
     secrets: "inherit"

From 4a66eb89e43017a36b243e42e37adf2c019db26b Mon Sep 17 00:00:00 2001
From: Avik Pal <avik.pal.2017@gmail.com>
Date: Thu, 17 Oct 2024 21:26:26 -0400
Subject: [PATCH 054/107] ci: taming down CI timings (#903)

* docs: fix links

* test: run tests in parallel

* fix: patch for jacobian

* ci: remove Logging group

* test: remove redundant files

* test: rename file

* ci: remove Logging group

* ci: allow depwarn

* test: fix typo

* ci: cancel intermediate runs

* test: minor fixes

* test: more testing

* test: more test fixes

* test: adjust strategy to reduce runtime

* docs: simplify the examples
---
 .github/workflows/Downgrade.yml               |   6 +-
 .github/workflows/Tests.yml                   |   2 +-
 Project.toml                                  |  14 +-
 docs/make.jl                                  |   3 +-
 docs/src/index.md                             |   2 +-
 .../tutorials/derivative_neural_network.md    |  52 +--
 docs/src/tutorials/neural_adapter.md          | 105 +++--
 .../src/tutorials/ode_parameter_estimation.md |  22 +-
 docs/src/tutorials/param_estim.md             |   2 +-
 docs/src/tutorials/pdesystem.md               |  19 +-
 docs/src/tutorials/systems.md                 | 102 ++---
 src/ode_solve.jl                              |  11 +-
 src/pinn_types.jl                             |   6 +
 test/BPINN_PDE_tests.jl                       | 260 +++++++++---
 test/BPINN_PDEinvsol_tests.jl                 | 143 -------
 test/{BPINN_Tests.jl => BPINN_tests.jl}       |  54 ++-
 test/IDE_tests.jl                             | 147 ++++---
 test/NNDAE_tests.jl                           |  30 +-
 test/NNODE_tests.jl                           | 371 ++++++++++--------
 test/NNODE_tstops_test.jl                     |  43 --
 ...E_tests_gpu_Lux.jl => NNPDE_cuda_tests.jl} | 127 +++---
 test/NNPDE_tests.jl                           | 297 ++++++++------
 test/adaptive_loss_tests.jl                   | 130 ++++--
 test/additional_loss_tests.jl                 | 191 +++++----
 test/{dgm_test.jl => dgm_tests.jl}            |  49 ++-
 test/direct_function_tests.jl                 |  41 +-
 test/forward_tests.jl                         |  32 +-
 test/logging_tests.jl                         | 102 -----
 test/neural_adapter_tests.jl                  |  76 ++--
 test/{qa.jl => qa_tests.jl}                   |   8 +-
 test/runtests.jl                              |  72 +---
 31 files changed, 1311 insertions(+), 1208 deletions(-)
 delete mode 100644 test/BPINN_PDEinvsol_tests.jl
 rename test/{BPINN_Tests.jl => BPINN_tests.jl} (89%)
 delete mode 100644 test/NNODE_tstops_test.jl
 rename test/{NNPDE_tests_gpu_Lux.jl => NNPDE_cuda_tests.jl} (61%)
 rename test/{dgm_test.jl => dgm_tests.jl} (74%)
 delete mode 100644 test/logging_tests.jl
 rename test/{qa.jl => qa_tests.jl} (68%)

diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml
index bcfab6b5d0..21c0898b08 100644
--- a/.github/workflows/Downgrade.yml
+++ b/.github/workflows/Downgrade.yml
@@ -10,6 +10,11 @@ on:
       - master
     paths-ignore:
       - 'docs/**'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref_name != github.event.repository.default_branch || github.ref != 'refs/tags/v*' }}
+
 jobs:
   test:
     runs-on: ubuntu-latest
@@ -23,7 +28,6 @@ jobs:
           - NNPDE1
           - NNPDE2
           - AdaptiveLoss
-          - Logging
           - Forward
           - DGM
           - NNODE
diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml
index b759a860b2..8c9178c683 100644
--- a/.github/workflows/Tests.yml
+++ b/.github/workflows/Tests.yml
@@ -34,7 +34,6 @@ jobs:
           - "NNPDE1"
           - "NNPDE2"
           - "AdaptiveLoss"
-          - "Logging"
           - "Forward"
           - "DGM"
           - "NNODE"
@@ -45,4 +44,5 @@ jobs:
       group: "${{ matrix.group }}"
       julia-version: "${{ matrix.version }}"
       coverage-directories: "src,ext"
+      julia-runtest-depwarn: "yes"  # TensorBoardLogger has a global depwarn
     secrets: "inherit"
diff --git a/Project.toml b/Project.toml
index 21b49693df..f060266905 100644
--- a/Project.toml
+++ b/Project.toml
@@ -54,7 +54,7 @@ NeuralPDETensorBoardLoggerExt = "TensorBoardLogger"
 ADTypes = "1.9.0"
 Adapt = "4"
 AdvancedHMC = "0.6.1"
-Aqua = "0.8"
+Aqua = "0.8.9"
 ArrayInterface = "7.11"
 CUDA = "5.5.2"
 ChainRulesCore = "1.24"
@@ -69,7 +69,9 @@ ExplicitImports = "1.10.1"
 Flux = "0.14.22"
 ForwardDiff = "0.10.36"
 Functors = "0.4.12"
+Hwloc = "3.3.0"
 Integrals = "4.5"
+InteractiveUtils = "<0.0.1, 1"
 IntervalSets = "0.7.10"
 LineSearches = "7.3"
 LinearAlgebra = "1.10"
@@ -88,14 +90,13 @@ Optimization = "4"
 OptimizationOptimJL = "0.4"
 OptimizationOptimisers = "0.3"
 OrdinaryDiffEq = "6.87"
-Pkg = "1.10"
 Printf = "1.10"
 QuasiMonteCarlo = "0.3.2"
 Random = "1"
+ReTestItems = "1.29.0"
 RecursiveArrayTools = "3.27.0"
 Reexport = "1.2"
 RuntimeGeneratedFunctions = "0.5.12"
-SafeTestsets = "0.1"
 SciMLBase = "2.56"
 Statistics = "1.10"
 StochasticDiffEq = "6.69.1"
@@ -114,6 +115,8 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
@@ -121,11 +124,10 @@ LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11"
 MethodOfLines = "94925ecb-adb7-4558-8ed8-f975c56a0bf4"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
+ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
 StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0"
 TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "CUDA", "DiffEqNoiseProcess", "ExplicitImports", "Flux", "LineSearches", "LuxCUDA", "LuxCore", "LuxLib", "MethodOfLines", "OptimizationOptimJL", "OrdinaryDiffEq", "Pkg", "SafeTestsets", "StochasticDiffEq", "TensorBoardLogger", "Test"]
+test = ["Aqua", "CUDA", "DiffEqNoiseProcess", "ExplicitImports", "Flux", "Hwloc", "InteractiveUtils", "LineSearches", "LuxCUDA", "LuxCore", "LuxLib", "MethodOfLines", "OptimizationOptimJL", "OrdinaryDiffEq", "ReTestItems", "StochasticDiffEq", "TensorBoardLogger", "Test"]
diff --git a/docs/make.jl b/docs/make.jl
index 32f56b6229..2fb6532685 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -18,5 +18,4 @@ makedocs(sitename = "NeuralPDE.jl",
         canonical = "https://docs.sciml.ai/NeuralPDE/stable/"),
     pages = pages)
 
-deploydocs(repo = "github.com/SciML/NeuralPDE.jl.git";
-    push_preview = true)
+deploydocs(repo = "github.com/SciML/NeuralPDE.jl.git"; push_preview = true)
diff --git a/docs/src/index.md b/docs/src/index.md
index 8a6334fc63..0511445f85 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -17,7 +17,7 @@ networks which both approximate physical laws and real data simultaneously.
   - Specialized forms for solving `ODEProblem`s with neural networks.
   - Compatibility with [Flux.jl](https://fluxml.ai/) and [Lux.jl](https://lux.csail.mit.edu/).
     for all the GPU-powered machine learning layers available from those libraries.
-  - Compatibility with [NeuralOperators.jl](https://docs.sciml.ai/NeuralOperators/stable/) for
+  - Compatibility with [NeuralOperators.jl](https://github.com/SciML/NeuralOperators.jl) for
     mixing DeepONets and other neural operators (Fourier Neural Operators, Graph Neural Operators,
     etc.) with physics-informed loss functions.
 
diff --git a/docs/src/tutorials/derivative_neural_network.md b/docs/src/tutorials/derivative_neural_network.md
index bd26ce50fe..ac9721d1aa 100644
--- a/docs/src/tutorials/derivative_neural_network.md
+++ b/docs/src/tutorials/derivative_neural_network.md
@@ -52,9 +52,8 @@ We approximate the derivative of the neural network with another neural network
 using the second numeric derivative `Dt(Dtu1(t,x))`.
 
 ```@example derivativenn
-using NeuralPDE, Lux, ModelingToolkit
-using Optimization, OptimizationOptimisers, OptimizationOptimJL, LineSearches
-using Plots
+using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers,
+      OptimizationOptimJL, LineSearches, Plots
 using ModelingToolkit: Interval, infimum, supremum
 
 @parameters t, x
@@ -63,35 +62,40 @@ Dx = Differential(x)
 @variables u1(..), u2(..), u3(..)
 @variables Dxu1(..) Dtu1(..) Dxu2(..) Dtu2(..)
 
-eqs_ = [Dt(Dtu1(t, x)) ~ Dx(Dxu1(t, x)) + u3(t, x) * sin(pi * x),
-    Dt(Dtu2(t, x)) ~ Dx(Dxu2(t, x)) + u3(t, x) * cos(pi * x),
-    exp(-t) ~ u1(t, x) * sin(pi * x) + u2(t, x) * cos(pi * x)]
-
-bcs_ = [u1(0.0, x) ~ sin(pi * x),
-    u2(0.0, x) ~ cos(pi * x),
-    Dt(u1(0, x)) ~ -sin(pi * x),
-    Dt(u2(0, x)) ~ -cos(pi * x),
+eqs_ = [
+    Dt(Dtu1(t, x)) ~ Dx(Dxu1(t, x)) + u3(t, x) * sinpi(x),
+    Dt(Dtu2(t, x)) ~ Dx(Dxu2(t, x)) + u3(t, x) * cospi(x),
+    exp(-t) ~ u1(t, x) * sinpi(x) + u2(t, x) * cospi(x)
+]
+
+bcs_ = [
+    u1(0.0, x) ~ sinpi(x),
+    u2(0.0, x) ~ cospi(x),
+    Dt(u1(0, x)) ~ -sinpi(x),
+    Dt(u2(0, x)) ~ -cospi(x),
     u1(t, 0.0) ~ 0.0,
     u2(t, 0.0) ~ exp(-t),
     u1(t, 1.0) ~ 0.0,
-    u2(t, 1.0) ~ -exp(-t)]
+    u2(t, 1.0) ~ -exp(-t)
+]
 
-der_ = [Dt(u1(t, x)) ~ Dtu1(t, x),
+der_ = [
+    Dt(u1(t, x)) ~ Dtu1(t, x),
     Dt(u2(t, x)) ~ Dtu2(t, x),
     Dx(u1(t, x)) ~ Dxu1(t, x),
-    Dx(u2(t, x)) ~ Dxu2(t, x)]
+    Dx(u2(t, x)) ~ Dxu2(t, x)
+]
 
 bcs__ = [bcs_; der_]
 
 # Space and time domains
-domains = [t ∈ Interval(0.0, 1.0),
-    x ∈ Interval(0.0, 1.0)]
+domains = [t ∈ Interval(0.0, 1.0), x ∈ Interval(0.0, 1.0)]
 
 input_ = length(domains)
 n = 15
-chain = [Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, 1)) for _ in 1:7]
+chain = [Chain(Dense(input_, n, σ), Dense(n, n, σ), Dense(n, 1)) for _ in 1:7]
 
-training_strategy = QuadratureTraining(; batch = 200, reltol = 1e-6, abstol = 1e-6)
+training_strategy = StochasticTraining(128)
 discretization = PhysicsInformedNN(chain, training_strategy)
 
 vars = [u1(t, x), u2(t, x), u3(t, x), Dxu1(t, x), Dtu1(t, x), Dxu2(t, x), Dtu2(t, x)]
@@ -126,13 +130,13 @@ using Plots
 ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
 minimizers_ = [res.u.depvar[sym_prob.depvars[i]] for i in 1:length(chain)]
 
-u1_real(t, x) = exp(-t) * sin(pi * x)
-u2_real(t, x) = exp(-t) * cos(pi * x)
+u1_real(t, x) = exp(-t) * sinpi(x)
+u2_real(t, x) = exp(-t) * cospi(x)
 u3_real(t, x) = (1 + pi^2) * exp(-t)
-Dxu1_real(t, x) = pi * exp(-t) * cos(pi * x)
-Dtu1_real(t, x) = -exp(-t) * sin(pi * x)
-Dxu2_real(t, x) = -pi * exp(-t) * sin(pi * x)
-Dtu2_real(t, x) = -exp(-t) * cos(pi * x)
+Dxu1_real(t, x) = pi * exp(-t) * cospi(x)
+Dtu1_real(t, x) = -exp(-t) * sinpi(x)
+Dxu2_real(t, x) = -pi * exp(-t) * sinpi(x)
+Dtu2_real(t, x) = -exp(-t) * cospi(x)
 
 function analytic_sol_func_all(t, x)
     [u1_real(t, x), u2_real(t, x), u3_real(t, x),
diff --git a/docs/src/tutorials/neural_adapter.md b/docs/src/tutorials/neural_adapter.md
index bcff48fa36..40914b6fab 100644
--- a/docs/src/tutorials/neural_adapter.md
+++ b/docs/src/tutorials/neural_adapter.md
@@ -24,43 +24,44 @@ Dxx = Differential(x)^2
 Dyy = Differential(y)^2
 
 # 2D PDE
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sinpi(x) * sinpi(y)
 
 # Initial and boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sin(pi * 1) * sin(pi * y),
-    u(x, 0) ~ 0.0, u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
+bcs = [
+    u(0, y) ~ 0.0,
+    u(1, y) ~ -sinpi(1) * sinpi(y),
+    u(x, 0) ~ 0.0,
+    u(x, 1) ~ -sinpi(x) * sinpi(1)
+]
 # Space and time domains
 domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
-quadrature_strategy = NeuralPDE.QuadratureTraining(reltol = 1e-3, abstol = 1e-6,
-    maxiters = 50, batch = 100)
+
+strategy = StochasticTraining(1024)
 inner = 8
-af = Lux.tanh
-chain1 = Lux.Chain(Lux.Dense(2, inner, af),
-    Lux.Dense(inner, inner, af),
-    Lux.Dense(inner, 1))
+af = tanh
+chain1 = Chain(Dense(2, inner, af), Dense(inner, inner, af), Dense(inner, 1))
 
-discretization = NeuralPDE.PhysicsInformedNN(chain1, quadrature_strategy)
+discretization = PhysicsInformedNN(chain1, strategy)
 
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+prob = discretize(pde_system, discretization)
+sym_prob = symbolic_discretize(pde_system, discretization)
 
 callback = function (p, l)
     println("Current loss is: $l")
     return false
 end
 
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(5e-3); maxiters = 10000)
+res = Optimization.solve(prob, OptimizationOptimisers.Adam(5e-3); maxiters = 10000,
+    callback)
 phi = discretization.phi
 
 inner_ = 8
-af = Lux.tanh
-chain2 = Lux.Chain(Dense(2, inner_, af),
-    Dense(inner_, inner_, af),
-    Dense(inner_, inner_, af),
+af = tanh
+chain2 = Chain(Dense(2, inner_, af), Dense(inner_, inner_, af), Dense(inner_, inner_, af),
     Dense(inner_, 1))
 initp, st = Lux.setup(Random.default_rng(), chain2)
-init_params2 = Float64.(ComponentArray(initp))
+init_params2 = ComponentArray{Float64}(initp)
 
 # the rule by which the training will take place is described here in loss function
 function loss(cord, θ)
@@ -69,15 +70,15 @@ function loss(cord, θ)
     ch2 .- phi(cord, res.u)
 end
 
-strategy = NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3)
+strategy = GridTraining(0.1)
 
-prob_ = NeuralPDE.neural_adapter(loss, init_params2, pde_system, strategy)
-res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 10000)
+prob_ = neural_adapter(loss, init_params2, pde_system, strategy)
+res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 10000, callback)
 
 phi_ = PhysicsInformedNN(chain2, strategy; init_params = res_.u).phi
 
 xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+analytic_sol_func(x, y) = (sinpi(x) * sinpi(y)) / (2pi^2)
 
 u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
     (length(xs), length(ys)))
@@ -99,7 +100,7 @@ plot(p1, p2, p3, p4, p5)
 
 ## Domain decomposition
 
-In this example, we first obtain a prediction of 2D Poisson equation on subdomains. We split up full domain into 10 sub problems by x, and create separate neural networks for each sub interval. If x domain ∈ [x_0, x_end] so, it is decomposed on 10 part: sub x domains = {[x_0, x_1], ... [x_i,x_i+1], ..., x_9,x_end]}.
+In this example, we first obtain a prediction of 2D Poisson equation on subdomains. We split up full domain into 10 sub problems by x, and create separate neural networks for each sub interval. If x domain ∈ [x_0, x_end] so, it is decomposed on 4 part: sub x domains = {[x_0, x_1], ... [x_i,x_i+1], ..., x_3,x_end]}.
 And then using the method neural_adapter, we retrain the batch of 10 predictions to the one prediction for full domain of task.
 
 ![domain_decomposition](https://user-images.githubusercontent.com/12683885/127149752-a4ecea50-2984-45d8-b0d4-d2eadecf58e7.png)
@@ -113,10 +114,14 @@ using ModelingToolkit: Interval, infimum, supremum
 Dxx = Differential(x)^2
 Dyy = Differential(y)^2
 
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sinpi(x) * sinpi(y)
 
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sin(pi * 1) * sin(pi * y),
-    u(x, 0) ~ 0.0, u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
+bcs = [
+    u(0, y) ~ 0.0,
+    u(1, y) ~ -sinpi(1) * sinpi(y),
+    u(x, 0) ~ 0.0,
+    u(x, 1) ~ -sinpi(x) * sinpi(1)
+]
 
 # Space
 x_0 = 0.0
@@ -125,38 +130,34 @@ x_domain = Interval(x_0, x_end)
 y_domain = Interval(0.0, 1.0)
 domains = [x ∈ x_domain, y ∈ y_domain]
 
-count_decomp = 10
+count_decomp = 4
 
 # Neural network
-af = Lux.tanh
+af = tanh
 inner = 10
-chains = [Lux.Chain(Dense(2, inner, af), Dense(inner, inner, af), Dense(inner, 1))
-          for _ in 1:count_decomp]
-init_params = map(
-    c -> Float64.(ComponentArray(Lux.setup(Random.default_rng(), c)[1])), chains)
+chain = Chain(Dense(2, inner, af), Dense(inner, inner, af), Dense(inner, 1))
 
 xs_ = infimum(x_domain):(1 / count_decomp):supremum(x_domain)
 xs_domain = [(xs_[i], xs_[i + 1]) for i in 1:(length(xs_) - 1)]
 domains_map = map(xs_domain) do (xs_dom)
     x_domain_ = Interval(xs_dom...)
-    domains_ = [x ∈ x_domain_,
-        y ∈ y_domain]
+    domains_ = [x ∈ x_domain_, y ∈ y_domain]
 end
 
-analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+analytic_sol_func(x, y) = (sinpi(x) * sinpi(y)) / (2pi^2)
 function create_bcs(x_domain_, phi_bound)
     x_0, x_e = x_domain_.left, x_domain_.right
     if x_0 == 0.0
         bcs = [u(0, y) ~ 0.0,
             u(x_e, y) ~ analytic_sol_func(x_e, y),
             u(x, 0) ~ 0.0,
-            u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
+            u(x, 1) ~ -sinpi(x) * sinpi(1)]
         return bcs
     end
     bcs = [u(x_0, y) ~ phi_bound(x_0, y),
         u(x_e, y) ~ analytic_sol_func(x_e, y),
         u(x, 0) ~ 0.0,
-        u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
+        u(x, 1) ~ -sinpi(x) * sinpi(1)]
     bcs
 end
 
@@ -174,14 +175,13 @@ for i in 1:count_decomp
     bcs_ = create_bcs(domains_[1].domain, phi_bound)
     @named pde_system_ = PDESystem(eq, bcs_, domains_, [x, y], [u(x, y)])
     push!(pde_system_map, pde_system_)
-    strategy = NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3)
 
-    discretization = NeuralPDE.PhysicsInformedNN(chains[i], strategy;
-        init_params = init_params[i])
+    strategy = StochasticTraining(1024)
+    discretization = PhysicsInformedNN(chain, strategy)
 
-    prob = NeuralPDE.discretize(pde_system_, discretization)
-    symprob = NeuralPDE.symbolic_discretize(pde_system_, discretization)
-    res_ = Optimization.solve(prob, OptimizationOptimisers.Adam(5e-3); maxiters = 10000)
+    prob = discretize(pde_system_, discretization)
+    symprob = symbolic_discretize(pde_system_, discretization)
+    res_ = solve(prob, OptimizationOptimisers.Adam(5e-3); maxiters = 10000, callback)
     phi = discretization.phi
     push!(reses, res_)
     push!(phis, phi)
@@ -218,15 +218,12 @@ dx = 0.01
 u_predict, diff_u = compose_result(dx)
 
 inner_ = 18
-af = Lux.tanh
-chain2 = Lux.Chain(Dense(2, inner_, af),
-    Dense(inner_, inner_, af),
-    Dense(inner_, inner_, af),
-    Dense(inner_, inner_, af),
-    Dense(inner_, 1))
+af = tanh
+chain2 = Chain(Dense(2, inner_, af), Dense(inner_, inner_, af), Dense(inner_, inner_, af),
+    Dense(inner_, inner_, af), Dense(inner_, 1))
 
 initp, st = Lux.setup(Random.default_rng(), chain2)
-init_params2 = Float64.(ComponentArray(initp))
+init_params2 = ComponentArray{Float64}(initp)
 
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 
@@ -243,12 +240,8 @@ callback = function (p, l)
     return false
 end
 
-prob_ = NeuralPDE.neural_adapter(losses, init_params2, pde_system_map,
-    NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3))
-res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
-prob_ = NeuralPDE.neural_adapter(losses, res_.u, pde_system_map,
-    NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3))
-res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
+prob_ = neural_adapter(losses, init_params2, pde_system_map, StochasticTraining(1024))
+res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000, callback)
 
 phi_ = PhysicsInformedNN(chain2, strategy; init_params = res_.u).phi
 
diff --git a/docs/src/tutorials/ode_parameter_estimation.md b/docs/src/tutorials/ode_parameter_estimation.md
index c8243e9587..784828ee7c 100644
--- a/docs/src/tutorials/ode_parameter_estimation.md
+++ b/docs/src/tutorials/ode_parameter_estimation.md
@@ -7,10 +7,7 @@ with Physics-Informed Neural Networks. Now we would consider the case where we w
 We start by defining the problem:
 
 ```@example param_estim_lv
-using NeuralPDE, OrdinaryDiffEq
-using Lux, Random
-using OptimizationOptimJL, LineSearches
-using Plots
+using NeuralPDE, OrdinaryDiffEq, Lux, Random, OptimizationOptimJL, LineSearches, Plots
 using Test # hide
 
 function lv(u, p, t)
@@ -42,21 +39,14 @@ Now, let's define a neural network for the PINN using [Lux.jl](https://lux.csail
 rng = Random.default_rng()
 Random.seed!(rng, 0)
 n = 15
-chain = Lux.Chain(
-    Lux.Dense(1, n, Lux.σ),
-    Lux.Dense(n, n, Lux.σ),
-    Lux.Dense(n, n, Lux.σ),
-    Lux.Dense(n, 2)
-)
-ps, st = Lux.setup(rng, chain) |> Lux.f64
+chain = Chain(Dense(1, n, σ), Dense(n, n, σ), Dense(n, n, σ), Dense(n, 2))
+ps, st = Lux.setup(rng, chain) |> f64
 ```
 
 Next we define an additional loss term to in the total loss which measures how the neural network's predictions is fitting the data.
 
 ```@example param_estim_lv
-function additional_loss(phi, θ)
-    return sum(abs2, phi(t_, θ) .- u_) / size(u_, 2)
-end
+additional_loss(phi, θ) = sum(abs2, phi(t_, θ) .- u_) / size(u_, 2)
 ```
 
 Next we define the optimizer and [`NNODE`](@ref) which is then plugged into the `solve` call.
@@ -64,14 +54,14 @@ Next we define the optimizer and [`NNODE`](@ref) which is then plugged into the
 ```@example param_estim_lv
 opt = LBFGS(linesearch = BackTracking())
 alg = NNODE(chain, opt, ps; strategy = WeightedIntervalTraining([0.7, 0.2, 0.1], 500),
-    param_estim = true, additional_loss = additional_loss)
+    param_estim = true, additional_loss)
 ```
 
 Now we have all the pieces to solve the optimization problem.
 
 ```@example param_estim_lv
 sol = solve(prob, alg, verbose = true, abstol = 1e-8, maxiters = 5000, saveat = t_)
-@test sol.k.u.p≈true_p rtol=1e-2 # hide
+@test sol.k.u.p≈true_p rtol=1e-2 norm=Base.Fix1(maximum, abs) # hide
 ```
 
 Let's plot the predictions from the PINN and compare it to the data.
diff --git a/docs/src/tutorials/param_estim.md b/docs/src/tutorials/param_estim.md
index db369ba932..52de7ede1a 100644
--- a/docs/src/tutorials/param_estim.md
+++ b/docs/src/tutorials/param_estim.md
@@ -79,7 +79,7 @@ three arguments:
   - the hyperparameters `p` .
 
 For a Lux neural network, the composed function will present itself as having θ as a
-[`ComponentArray`](https://docs.sciml.ai/ComponentArrays/stable/)
+[`ComponentArray`](https://github.com/jonniedie/ComponentArrays.jl)
 subsets `θ.x`, which can also be dereferenced like `θ[:x]`. Thus, the additional
 loss looks like:
 
diff --git a/docs/src/tutorials/pdesystem.md b/docs/src/tutorials/pdesystem.md
index 3fbfd55d92..e4156d9ef4 100644
--- a/docs/src/tutorials/pdesystem.md
+++ b/docs/src/tutorials/pdesystem.md
@@ -28,10 +28,8 @@ Using physics-informed neural networks.
 ## Copy-Pasteable Code
 
 ```@example poisson
-using NeuralPDE, Lux, Optimization, OptimizationOptimJL
-using LineSearches
+using NeuralPDE, Lux, Optimization, OptimizationOptimJL, LineSearches, Plots
 using ModelingToolkit: Interval
-using Plots
 
 @parameters x y
 @variables u(..)
@@ -42,15 +40,16 @@ Dyy = Differential(y)^2
 eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
 
 # Boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+bcs = [
+    u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0
+]
 # Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
+domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
 
 # Neural network
 dim = 2 # number of dimensions
-chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
+chain = Chain(Dense(dim, 16, σ), Dense(16, 16, σ), Dense(16, 1))
 
 # Discretization
 discretization = PhysicsInformedNN(
@@ -66,7 +65,7 @@ callback = function (p, l)
 end
 
 # Optimizer
-opt = OptimizationOptimJL.LBFGS(linesearch = BackTracking())
+opt = LBFGS(linesearch = BackTracking())
 res = solve(prob, opt, maxiters = 1000)
 phi = discretization.phi
 
@@ -116,7 +115,7 @@ Here, we define the neural network, where the input of NN equals the number of d
 ```@example poisson
 # Neural network
 dim = 2 # number of dimensions
-chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
+chain = Chain(Dense(dim, 16, σ), Dense(16, 16, σ), Dense(16, 1))
 ```
 
 Here, we build PhysicsInformedNN algorithm where `dx` is the step of discretization where
diff --git a/docs/src/tutorials/systems.md b/docs/src/tutorials/systems.md
index fceaa68980..321efad2ae 100644
--- a/docs/src/tutorials/systems.md
+++ b/docs/src/tutorials/systems.md
@@ -35,7 +35,8 @@ with physics-informed neural networks.
 ## Solution
 
 ```@example system
-using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, LineSearches
+using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, LineSearches,
+      OptimizationOptimisers
 using ModelingToolkit: Interval, infimum, supremum
 
 @parameters t, x
@@ -45,29 +46,32 @@ Dtt = Differential(t)^2
 Dx = Differential(x)
 Dxx = Differential(x)^2
 
-eqs = [Dtt(u1(t, x)) ~ Dxx(u1(t, x)) + u3(t, x) * sin(pi * x),
-    Dtt(u2(t, x)) ~ Dxx(u2(t, x)) + u3(t, x) * cos(pi * x),
-    0.0 ~ u1(t, x) * sin(pi * x) + u2(t, x) * cos(pi * x) - exp(-t)]
-
-bcs = [u1(0, x) ~ sin(pi * x),
-    u2(0, x) ~ cos(pi * x),
-    Dt(u1(0, x)) ~ -sin(pi * x),
-    Dt(u2(0, x)) ~ -cos(pi * x),
+eqs = [
+    Dtt(u1(t, x)) ~ Dxx(u1(t, x)) + u3(t, x) * sinpi(x),
+    Dtt(u2(t, x)) ~ Dxx(u2(t, x)) + u3(t, x) * cospi(x),
+    0.0 ~ u1(t, x) * sinpi(x) + u2(t, x) * cospi(x) - exp(-t)
+]
+
+bcs = [
+    u1(0, x) ~ sinpi(x),
+    u2(0, x) ~ cospi(x),
+    Dt(u1(0, x)) ~ -sinpi(x),
+    Dt(u2(0, x)) ~ -cospi(x),
     u1(t, 0) ~ 0.0,
     u2(t, 0) ~ exp(-t),
     u1(t, 1) ~ 0.0,
-    u2(t, 1) ~ -exp(-t)]
+    u2(t, 1) ~ -exp(-t)
+]
 
 # Space and time domains
-domains = [t ∈ Interval(0.0, 1.0),
-    x ∈ Interval(0.0, 1.0)]
+domains = [t ∈ Interval(0.0, 1.0), x ∈ Interval(0.0, 1.0)]
 
 # Neural network
 input_ = length(domains)
 n = 15
-chain = [Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, 1)) for _ in 1:3]
+chain = [Chain(Dense(input_, n, σ), Dense(n, n, σ), Dense(n, 1)) for _ in 1:3]
 
-strategy = QuadratureTraining(; batch = 200, abstol = 1e-6, reltol = 1e-6)
+strategy = StochasticTraining(128)
 discretization = PhysicsInformedNN(chain, strategy)
 
 @named pdesystem = PDESystem(eqs, bcs, domains, [t, x], [u1(t, x), u2(t, x), u3(t, x)])
@@ -84,7 +88,9 @@ callback = function (p, l)
     return false
 end
 
-res = solve(prob, LBFGS(linesearch = BackTracking()); maxiters = 1000)
+res = solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 1000, callback)
+prob = remake(prob, u0 = res.u)
+res = solve(prob, LBFGS(linesearch = BackTracking()); maxiters = 200, callback)
 phi = discretization.phi
 ```
 
@@ -95,64 +101,19 @@ interface. Here is an example using the components from `symbolic_discretize` to
 reproduce the `discretize` optimization:
 
 ```@example system
-using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, LineSearches
-import ModelingToolkit: Interval, infimum, supremum
-
-@parameters t, x
-@variables u1(..), u2(..), u3(..)
-Dt = Differential(t)
-Dtt = Differential(t)^2
-Dx = Differential(x)
-Dxx = Differential(x)^2
-
-eqs = [Dtt(u1(t, x)) ~ Dxx(u1(t, x)) + u3(t, x) * sin(pi * x),
-    Dtt(u2(t, x)) ~ Dxx(u2(t, x)) + u3(t, x) * cos(pi * x),
-    0.0 ~ u1(t, x) * sin(pi * x) + u2(t, x) * cos(pi * x) - exp(-t)]
-
-bcs = [u1(0, x) ~ sin(pi * x),
-    u2(0, x) ~ cos(pi * x),
-    Dt(u1(0, x)) ~ -sin(pi * x),
-    Dt(u2(0, x)) ~ -cos(pi * x),
-    u1(t, 0) ~ 0.0,
-    u2(t, 0) ~ exp(-t),
-    u1(t, 1) ~ 0.0,
-    u2(t, 1) ~ -exp(-t)]
-
-# Space and time domains
-domains = [t ∈ Interval(0.0, 1.0),
-    x ∈ Interval(0.0, 1.0)]
-
-# Neural network
-input_ = length(domains)
-n = 15
-chain = [Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, 1)) for _ in 1:3]
-@named pdesystem = PDESystem(eqs, bcs, domains, [t, x], [u1(t, x), u2(t, x), u3(t, x)])
-
-strategy = NeuralPDE.QuadratureTraining()
-discretization = PhysicsInformedNN(chain, strategy)
-sym_prob = NeuralPDE.symbolic_discretize(pdesystem, discretization)
-
 pde_loss_functions = sym_prob.loss_functions.pde_loss_functions
 bc_loss_functions = sym_prob.loss_functions.bc_loss_functions
 
-callback = function (p, l)
-    println("loss: ", l)
-    println("pde_losses: ", map(l_ -> l_(p.u), pde_loss_functions))
-    println("bcs_losses: ", map(l_ -> l_(p.u), bc_loss_functions))
-    return false
-end
-
 loss_functions = [pde_loss_functions; bc_loss_functions]
 
-function loss_function(θ, p)
-    sum(map(l -> l(θ), loss_functions))
-end
+loss_function(θ, _) = sum(l -> l(θ), loss_functions)
 
-f_ = OptimizationFunction(loss_function, Optimization.AutoZygote())
-prob = Optimization.OptimizationProblem(f_, sym_prob.flat_init_params)
+f_ = OptimizationFunction(loss_function, AutoZygote())
+prob = OptimizationProblem(f_, sym_prob.flat_init_params)
 
-res = Optimization.solve(
-    prob, OptimizationOptimJL.LBFGS(linesearch = BackTracking()); maxiters = 1000)
+res = solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 1000, callback)
+prob = remake(prob, u0 = res.u)
+res = solve(prob, LBFGS(linesearch = BackTracking()); maxiters = 200, callback)
 ```
 
 ## Solution Representation
@@ -168,10 +129,12 @@ ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
 minimizers_ = [res.u.depvar[sym_prob.depvars[i]] for i in 1:3]
 
 function analytic_sol_func(t, x)
-    [exp(-t) * sin(pi * x), exp(-t) * cos(pi * x), (1 + pi^2) * exp(-t)]
+    [exp(-t) * sinpi(x), exp(-t) * cospi(x), (1 + pi^2) * exp(-t)]
 end
+
 u_real = [[analytic_sol_func(t, x)[i] for t in ts for x in xs] for i in 1:3]
 u_predict = [[phi[i]([t, x], minimizers_[i])[1] for t in ts for x in xs] for i in 1:3]
+
 diff_u = [abs.(u_real[i] .- u_predict[i]) for i in 1:3]
 ps = []
 for i in 1:3
@@ -196,7 +159,7 @@ ps[3]
 
 Notice here that the solution is represented in the `OptimizationSolution` with `u` as
 the parameters for the trained neural network. But, for the case where the neural network
-is from Lux.jl, it's given as a `ComponentArray` where `res.u.depvar.x` corresponds to the result
+is from jl, it's given as a `ComponentArray` where `res.u.depvar.x` corresponds to the result
 for the neural network corresponding to the dependent variable `x`, i.e. `res.u.depvar.u1`
 are the trained parameters for `phi[1]` in our example. For simpler indexing, you can use
 `res.u.depvar[:u1]` or `res.u.depvar[Symbol(:u,1)]` as shown here.
@@ -220,8 +183,7 @@ Dyy = Differential(y)^2
 bcs = [u[1](x, 0) ~ x, u[2](x, 0) ~ 2, u[3](x, 0) ~ 3, u[4](x, 0) ~ 4]
 
 # matrix PDE
-eqs = @. [(Dxx(u_(x, y)) + Dyy(u_(x, y))) for u_ in u] ~ -sin(pi * x) * sin(pi * y) *
-                                                         [0 1; 0 1]
+eqs = @. [(Dxx(u_(x, y)) + Dyy(u_(x, y))) for u_ in u] ~ -sinpi(x) * sinpi(y) * [0 1; 0 1]
 
 size(eqs)
 ```
diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index fe6a770cd4..0b3ef34474 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -152,14 +152,11 @@ Computes u' using either forward-mode automatic differentiation or numerical dif
 """
 function ode_dfdx end
 
-function ode_dfdx(phi::ODEPhi{<:Number}, t::Number, θ, autodiff::Bool)
-    autodiff && return ForwardDiff.derivative(Base.Fix2(phi, θ), t)
-    ϵ = sqrt(eps(typeof(t)))
-    return (phi(t + ϵ, θ) - phi(t, θ)) / ϵ
-end
-
 function ode_dfdx(phi::ODEPhi, t, θ, autodiff::Bool)
-    autodiff && return ForwardDiff.jacobian(Base.Fix2(phi, θ), t)
+    if autodiff
+        t isa Number && return ForwardDiff.derivative(Base.Fix2(phi, θ), t)
+        return ForwardDiff.jacobian(Base.Fix2(phi, θ), t)
+    end
     ϵ = sqrt(eps(eltype(t)))
     return (phi(t .+ ϵ, θ) .- phi(t, θ)) ./ ϵ
 end
diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index 15b426f0f1..6a7d617e9b 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -1,3 +1,9 @@
+"""
+    LogOptions(log_frequency)
+    LogOptions(; log_frequency = 50)
+
+Options for logging during optimization.
+"""
 struct LogOptions
     log_frequency::Int
     # TODO: add in an option for saving plots in the log. this is currently not done because the type of plot is dependent on the PDESystem
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index cbb8ffa46c..6a768533d4 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -1,22 +1,21 @@
-using Test, MCMCChains, Lux, ModelingToolkit, ForwardDiff, Distributions, OrdinaryDiffEq,
-      AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
-      ComponentArrays
-import ModelingToolkit: Interval, infimum, supremum
-import Flux
+@testitem "BPINN PDE I: 2D Periodic System" tags=[:pdebpinn] begin
+    using MCMCChains, Lux, ModelingToolkit, Distributions, OrdinaryDiffEq,
+          AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
+          ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
 
-Random.seed!(100)
+    Random.seed!(100)
 
-@testset "Example 1: 2D Periodic System" begin
-    # Cos(pi*t) example
     @parameters t
     @variables u(..)
     Dt = Differential(t)
-    eqs = Dt(u(t)) - cos(2 * π * t) ~ 0
-    bcs = [u(0) ~ 0.0]
+    eq = Dt(u(t)) - cospi(2t) ~ 0
+    bcs = [u(0.0) ~ 0.0]
     domains = [t ∈ Interval(0.0, 2.0)]
+
     chainl = Chain(Dense(1, 6, tanh), Dense(6, 1))
     initl, st = Lux.setup(Random.default_rng(), chainl)
-    @named pde_system = PDESystem(eqs, bcs, domains, [t], [u(t)])
+    @named pde_system = PDESystem(eq, bcs, domains, [t], [u(t)])
 
     # non adaptive case
     discretization = BayesianPINN([chainl], GridTraining([0.01]))
@@ -25,28 +24,39 @@ Random.seed!(100)
         pde_system, discretization; draw_samples = 1500, bcstd = [0.02],
         phystd = [0.01], priorsNNw = (0.0, 1.0), saveats = [1 / 50.0])
 
-    analytic_sol_func(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+    analytic_sol_func(u0, t) = u0 + sinpi(2t) / (2pi)
     ts = vec(sol1.timepoints[1])
     u_real = [analytic_sol_func(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
+
     @test u_predict≈u_real atol=0.5
     @test mean(u_predict .- u_real) < 0.1
 end
 
-@testset "Example 2: 1D ODE" begin
+@testitem "BPINN PDE II: 1D ODE" tags=[:pdebpinn] begin
+    using MCMCChains, Lux, ModelingToolkit, Distributions, OrdinaryDiffEq,
+          AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
+          ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
     @parameters θ
     @variables u(..)
     Dθ = Differential(θ)
 
     # 1D ODE
-    eq = Dθ(u(θ)) ~ θ^3 + 2 * θ + (θ^2) * ((1 + 3 * (θ^2)) / (1 + θ + (θ^3))) -
-                    u(θ) * (θ + ((1 + 3 * (θ^2)) / (1 + θ + θ^3)))
+    eq = Dθ(u(θ)) ~ θ^3 + 2.0f0 * θ + (θ^2) * ((1.0f0 + 3 * (θ^2)) / (1.0f0 + θ + (θ^3))) -
+                    u(θ) * (θ + ((1.0f0 + 3.0f0 * (θ^2)) / (1.0f0 + θ + θ^3)))
 
     # Initial and boundary conditions
-    bcs = [u(0.0) ~ 1.0]
+    bcs = [u(0.0) ~ 1.0f0]
 
     # Space and time domains
-    domains = [θ ∈ Interval(0.0, 1.0)]
+    domains = [θ ∈ Interval(0.0f0, 1.0f0)]
+
+    # Discretization
+    dt = 0.1f0
 
     # Neural network
     chain = Chain(Dense(1, 12, σ), Dense(12, 1))
@@ -66,23 +76,32 @@ end
     @test u_predict≈u_real atol=0.8
 end
 
-@testset "Example 3: 3rd Degree ODE" begin
+@testitem "BPINN PDE III: 3rd Degree ODE" tags=[:pdebpinn] begin
+    using MCMCChains, Lux, ModelingToolkit, Distributions, OrdinaryDiffEq,
+          AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
+          ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
     @parameters x
     @variables u(..), Dxu(..), Dxxu(..), O1(..), O2(..)
     Dxxx = Differential(x)^3
     Dx = Differential(x)
 
     # ODE
-    eq = Dx(Dxxu(x)) ~ cos(pi * x)
+    eq = Dx(Dxxu(x)) ~ cospi(x)
 
     # Initial and boundary conditions
     ep = (cbrt(eps(eltype(Float64))))^2 / 6
 
-    bcs = [u(0.0) ~ 0.0,
-        u(1.0) ~ cos(pi),
+    bcs = [
+        u(0.0) ~ 0.0,
+        u(1.0) ~ cospi(1.0),
         Dxu(1.0) ~ 1.0,
         Dxu(x) ~ Dx(u(x)) + ep * O1(x),
-        Dxxu(x) ~ Dx(Dxu(x)) + ep * O2(x)]
+        Dxxu(x) ~ Dx(Dxu(x)) + ep * O2(x)
+    ]
 
     # Space and time domains
     domains = [x ∈ Interval(0.0, 1.0)]
@@ -105,7 +124,7 @@ end
         bcstd = [0.01, 0.01, 0.01, 0.01, 0.01], phystd = [0.005],
         priorsNNw = (0.0, 10.0), saveats = [1 / 100.0])
 
-    analytic_sol_func(x) = (π * x * (-x + (π^2) * (2 * x - 3) + 1) - sin(π * x)) / (π^3)
+    analytic_sol_func(x) = (π * x * (-x + (π^2) * (2 * x - 3) + 1) - sinpi(x)) / (π^3)
 
     u_predict = pmean(sol1.ensemblesol[1])
     xs = vec(sol1.timepoints[1])
@@ -113,7 +132,14 @@ end
     @test u_predict≈u_real atol=0.5
 end
 
-@testset "Example 4: 2D Poissons equation" begin
+@testitem "BPINN PDE IV: 2D Poisson" tags=[:pdebpinn] begin
+    using MCMCChains, Lux, ModelingToolkit, Distributions, OrdinaryDiffEq,
+          AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
+          ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
     @parameters x y
     @variables u(..)
     Dxx = Differential(x)^2
@@ -123,65 +149,205 @@ end
     eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
 
     # Boundary conditions
-    bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-        u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+    bcs = [
+        u(0, y) ~ 0.0,
+        u(1, y) ~ 0.0,
+        u(x, 0) ~ 0.0,
+        u(x, 1) ~ 0.0
+    ]
 
     # Space and time domains
-    domains = [x ∈ Interval(0.0, 1.0),
-        y ∈ Interval(0.0, 1.0)]
+    domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
+
+    # Discretization
+    dt = 0.1f0
 
     # Neural network
-    dim = 2 # number of dimensions
-    chain = Chain(Dense(dim, 9, σ), Dense(9, 9, σ), Dense(9, 1))
+    chain = Chain(Dense(2, 9, σ), Dense(9, 9, σ), Dense(9, 1))
 
-    # Discretization
     dx = 0.04
     discretization = BayesianPINN([chain], GridTraining(dx))
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 
-    sol1 = ahmc_bayesian_pinn_pde(pde_system, discretization; draw_samples = 200,
+    sol = ahmc_bayesian_pinn_pde(pde_system, discretization; draw_samples = 200,
         bcstd = [0.003, 0.003, 0.003, 0.003], phystd = [0.003],
         priorsNNw = (0.0, 10.0), saveats = [1 / 100.0, 1 / 100.0])
 
-    xs = sol1.timepoints[1]
-    analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+    xs = sol.timepoints[1]
+    analytic_sol_func(x, y) = (sinpi(x) * sinpi(y)) / (2pi^2)
 
-    u_predict = pmean(sol1.ensemblesol[1])
+    u_predict = pmean(sol.ensemblesol[1])
     u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-    @test u_predict≈u_real atol=1.5
+    @test u_predict≈u_real rtol=0.5
 end
 
-@testset "Translating from Flux" begin
+@testitem "BPINN PDE: Translating from Flux" tags=[:pdebpinn] begin
+    using MCMCChains, Lux, ModelingToolkit, Distributions, OrdinaryDiffEq,
+          AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
+          ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
+    import Flux
+
+    Random.seed!(100)
+
     @parameters θ
     @variables u(..)
     Dθ = Differential(θ)
 
     # 1D ODE
-    eq = Dθ(u(θ)) ~ θ^3 + 2 * θ + (θ^2) * ((1 + 3 * (θ^2)) / (1 + θ + (θ^3))) -
-                    u(θ) * (θ + ((1 + 3 * (θ^2)) / (1 + θ + θ^3)))
+    eq = Dθ(u(θ)) ~ θ^3 + 2.0f0 * θ + (θ^2) * ((1.0f0 + 3 * (θ^2)) / (1.0f0 + θ + (θ^3))) -
+                    u(θ) * (θ + ((1.0f0 + 3.0f0 * (θ^2)) / (1.0f0 + θ + θ^3)))
 
     # Initial and boundary conditions
-    bcs = [u(0.0) ~ 1.0]
+    bcs = [u(0.0) ~ 1.0f0]
 
     # Space and time domains
-    domains = [θ ∈ Interval(0.0, 1.0)]
+    domains = [θ ∈ Interval(0.0f0, 1.0f0)]
 
     # Neural network
     chain = Flux.Chain(Flux.Dense(1, 12, Flux.σ), Flux.Dense(12, 1))
 
     discretization = BayesianPINN([chain], GridTraining([0.01]))
-    @test discretization.chain[1] isa AbstractLuxLayer
+    @test discretization.chain[1] isa Lux.AbstractLuxLayer
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
 
-    sol1 = ahmc_bayesian_pinn_pde(
-        pde_system, discretization; draw_samples = 500, bcstd = [0.1],
-        phystd = [0.05], priorsNNw = (0.0, 10.0), saveats = [1 / 100.0])
+    sol = ahmc_bayesian_pinn_pde(pde_system, discretization; draw_samples = 500,
+        bcstd = [0.1], phystd = [0.05], priorsNNw = (0.0, 10.0), saveats = [1 / 100.0])
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
-    ts = sol1.timepoints[1]
+    ts = sol.timepoints[1]
     u_real = vec([analytic_sol_func(t) for t in ts])
-    u_predict = pmean(sol1.ensemblesol[1])
+    u_predict = pmean(sol.ensemblesol[1])
+
     @test u_predict≈u_real atol=0.8
 end
+
+@testitem "BPINN PDE Inv I: 1D Periodic System" tags=[:pdebpinn] begin
+    using MCMCChains, Lux, ModelingToolkit, Distributions, OrdinaryDiffEq,
+          AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
+          ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
+    @parameters t p
+    @variables u(..)
+
+    Dt = Differential(t)
+    eqs = Dt(u(t)) - cos(p * t) ~ 0
+    bcs = [u(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 2.0)]
+
+    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+    initl, st = Lux.setup(Random.default_rng(), chainl)
+
+    @named pde_system = PDESystem(eqs,
+        bcs,
+        domains,
+        [t],
+        [u(t)],
+        [p],
+        defaults = Dict([p => 4.0]))
+
+    analytic_sol_func1(u0, t) = u0 + sinpi(2t) / (2π)
+    timepoints = collect(0.0:(1 / 100.0):2.0)
+    u = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+    u = u .+ (u .* 0.2) .* randn(size(u))
+    dataset = [hcat(u, timepoints)]
+
+    @testset "$(nameof(typeof(strategy)))" for strategy in [
+        StochasticTraining(200),
+        QuasiRandomTraining(200),
+        GridTraining([0.02])
+    ]
+        discretization = BayesianPINN([chainl], strategy; param_estim = true,
+            dataset = [dataset, nothing])
+
+        sol1 = ahmc_bayesian_pinn_pde(pde_system,
+            discretization;
+            draw_samples = 1500,
+            bcstd = [0.05],
+            phystd = [0.01], l2std = [0.01],
+            priorsNNw = (0.0, 1.0),
+            saveats = [1 / 50.0],
+            param = [LogNormal(6.0, 0.5)])
+
+        param = 2 * π
+        ts = vec(sol1.timepoints[1])
+        u_real = [analytic_sol_func1(0.0, t) for t in ts]
+        u_predict = pmean(sol1.ensemblesol[1])
+
+        @test u_predict≈u_real atol=1.5
+        @test mean(u_predict .- u_real) < 0.1
+        @test sol1.estimated_de_params[1]≈param atol=param * 0.3
+    end
+end
+
+@testitem "BPINN PDE Inv II: Lorenz System" tags=[:pdebpinn] begin
+    using MCMCChains, Lux, ModelingToolkit, Distributions, OrdinaryDiffEq,
+          AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
+          ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
+    @parameters t, σ_
+    @variables x(..), y(..), z(..)
+    Dt = Differential(t)
+    eqs = [
+        Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+        Dt(z(t)) ~ x(t) * y(t) - 8.0 / 3.0 * z(t)
+    ]
+
+    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 1.0)]
+
+    input_ = length(domains)
+    n = 7
+    chain = [
+        Chain(Dense(input_, n, tanh), Dense(n, n, tanh), Dense(n, 1)),
+        Chain(Dense(input_, n, tanh), Dense(n, n, tanh), Dense(n, 1)),
+        Chain(Dense(input_, n, tanh), Dense(n, n, tanh), Dense(n, 1))
+    ]
+
+    # Generate Data
+    function lorenz!(du, u, p, t)
+        du[1] = 10.0 * (u[2] - u[1])
+        du[2] = u[1] * (28.0 - u[3]) - u[2]
+        du[3] = u[1] * u[2] - (8.0 / 3.0) * u[3]
+    end
+
+    u0 = [1.0; 0.0; 0.0]
+    tspan = (0.0, 1.0)
+    prob = ODEProblem(lorenz!, u0, tspan)
+    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+    ts = sol.t
+    us = hcat(sol.u...)
+    us = us .+ ((0.05 .* randn(size(us))) .* us)
+    ts_ = hcat(sol(ts).t...)[1, :]
+    dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+        dataset = [dataset, nothing])
+
+    @named pde_system = PDESystem(eqs, bcs, domains,
+        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 50,
+        bcstd = [0.3, 0.3, 0.3],
+        phystd = [0.1, 0.1, 0.1],
+        l2std = [1, 1, 1],
+        priorsNNw = (0.0, 1.0),
+        saveats = [0.01],
+        param = [Normal(12.0, 2)])
+
+    idealp = 10.0
+    p_ = sol1.estimated_de_params[1]
+    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+end
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
deleted file mode 100644
index fd64e177da..0000000000
--- a/test/BPINN_PDEinvsol_tests.jl
+++ /dev/null
@@ -1,143 +0,0 @@
-using Test, MCMCChains, Lux, ModelingToolkit, ForwardDiff, Distributions, OrdinaryDiffEq,
-      AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
-      ComponentArrays
-import ModelingToolkit: Interval, infimum, supremum
-
-Random.seed!(100)
-
-@testset "Example 1: 1D Periodic System with parameter estimation" begin
-    # Cos(pi*t) periodic curve
-    @parameters t, p
-    @variables u(..)
-
-    Dt = Differential(t)
-    eqs = Dt(u(t)) - cos(p * t) ~ 0
-    bcs = [u(0) ~ 0.0]
-    domains = [t ∈ Interval(0.0, 2.0)]
-
-    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-    initl, st = Lux.setup(Random.default_rng(), chainl)
-
-    @named pde_system = PDESystem(eqs,
-        bcs,
-        domains,
-        [t],
-        [u(t)],
-        [p],
-        defaults = Dict([p => 4.0]))
-
-    analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-    timepoints = collect(0.0:(1 / 100.0):2.0)
-    u = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-    u = u .+ (u .* 0.2) .* randn(size(u))
-    dataset = [hcat(u, timepoints)]
-
-    # checking all training strategies
-    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    # alternative to QuadratureTraining [WIP]
-
-    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-        dataset = [dataset, nothing])
-
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    param = 2 * π
-    ts = vec(sol1.timepoints[1])
-    u_real = [analytic_sol_func1(0.0, t) for t in ts]
-    u_predict = pmean(sol1.ensemblesol[1])
-
-    @test u_predict≈u_real atol=1.5
-    @test mean(u_predict .- u_real) < 0.1
-    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
-end
-
-@testset "Example 2: Lorenz System with parameter estimation" begin
-    @parameters t, σ_
-    @variables x(..), y(..), z(..)
-    Dt = Differential(t)
-    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-        Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-    domains = [t ∈ Interval(0.0, 1.0)]
-
-    input_ = length(domains)
-    n = 7
-    chain = [
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1))
-    ]
-
-    #Generate Data
-    function lorenz!(du, u, p, t)
-        du[1] = 10.0 * (u[2] - u[1])
-        du[2] = u[1] * (28.0 - u[3]) - u[2]
-        du[3] = u[1] * u[2] - (8 / 3) * u[3]
-    end
-
-    u0 = [1.0; 0.0; 0.0]
-    tspan = (0.0, 1.0)
-    prob = ODEProblem(lorenz!, u0, tspan)
-    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-    ts = sol.t
-    us = hcat(sol.u...)
-    us = us .+ ((0.05 .* randn(size(us))) .* us)
-    ts_ = hcat(sol(ts).t...)[1, :]
-    dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-        dataset = [dataset, nothing])
-
-    @named pde_system = PDESystem(eqs, bcs, domains,
-        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 50,
-        bcstd = [0.3, 0.3, 0.3],
-        phystd = [0.1, 0.1, 0.1],
-        l2std = [1, 1, 1],
-        priorsNNw = (0.0, 1.0),
-        saveats = [0.01],
-        param = [Normal(12.0, 2)])
-
-    idealp = 10.0
-    p_ = sol1.estimated_de_params[1]
-    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-end
diff --git a/test/BPINN_Tests.jl b/test/BPINN_tests.jl
similarity index 89%
rename from test/BPINN_Tests.jl
rename to test/BPINN_tests.jl
index c011e8fe9b..7f1df5691a 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_tests.jl
@@ -1,14 +1,10 @@
-using Test, MCMCChains, ForwardDiff, Distributions, OrdinaryDiffEq, OptimizationOptimisers,
-      AdvancedHMC, Lux, Statistics, Random, Functors, ComponentArrays, NeuralPDE,
-      MonteCarloMeasurements
-import Flux
+@testitem "BPINN ODE I: Without Param Estimation" tags=[:odebpinn] begin
+    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
+          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
+    import Flux
 
-# note that current testing bounds can be easily further tightened but have been inflated
-# for support for Julia build v1 on latest Julia version it performs much better for below
-# tests
-Random.seed!(100)
+    Random.seed!(100)
 
-@testset "Example 1 - without parameter estimation" begin
     linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
     linear = (u, p, t) -> cos(2 * π * t)
     tspan = (0.0, 2.0)
@@ -56,7 +52,13 @@ Random.seed!(100)
     @test mean(abs.(physsol0_1 .- pmean(sol1lux.ensemblesol[1]))) < 0.025
 end
 
-@testset "Example 2 - with parameter estimation" begin
+@testitem "BPINN ODE II: With Parameter Estimation" tags=[:odebpinn] begin
+    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
+          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
+    import Flux
+
+    Random.seed!(100)
+
     linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
     linear = (u, p, t) -> cos(p * t)
     tspan = (0.0, 2.0)
@@ -118,7 +120,13 @@ end
     @test abs(p - sol2lux.estimated_de_params[1]) < abs(0.15 * p)
 end
 
-@testset "Example 3" begin
+@testitem "BPINN ODE III" tags=[:odebpinn] begin
+    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
+          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
+    import Flux
+
+    Random.seed!(100)
+
     linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
     tspan = (0.0, 10.0)
     u0 = 0.0
@@ -186,7 +194,13 @@ end
     @test abs(param1 - p) < abs(0.45 * p)
 end
 
-@testset "Translating from Flux" begin
+@testitem "BPINN ODE: Translating from Flux" tags=[:odebpinn] begin
+    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
+          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
+    import Flux
+
+    Random.seed!(100)
+
     linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
     linear = (u, p, t) -> cos(2 * π * t)
     tspan = (0.0, 2.0)
@@ -214,7 +228,13 @@ end
     @test alg.chain isa AbstractLuxLayer
 end
 
-@testset "Example 3 but with the new objective" begin
+@testitem "BPINN ODE III: with the new objective" tags=[:odebpinn] begin
+    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
+          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
+    import Flux
+
+    Random.seed!(100)
+
     linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
     tspan = (0.0, 10.0)
     u0 = 0.0
@@ -289,7 +309,13 @@ end
     @test_broken abs(param3 - p) < abs(0.2 * p)
 end
 
-@testset "Example 4 - improvement" begin
+@testitem "BPINN ODE IV: Improvement" tags=[:odebpinn] begin
+    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
+          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
+    import Flux
+
+    Random.seed!(100)
+
     function lotka_volterra(u, p, t)
         # Model parameters.
         α, β, γ, δ = p
diff --git a/test/IDE_tests.jl b/test/IDE_tests.jl
index f0cdfd5d52..1335450d28 100644
--- a/test/IDE_tests.jl
+++ b/test/IDE_tests.jl
@@ -1,15 +1,23 @@
-using Test, NeuralPDE, Optimization, OptimizationOptimJL, DomainSets, Lux, Random,
-      Statistics
-import ModelingToolkit: Interval
+@testsetup module IntegroDiffTestSetup
 
-Random.seed!(110)
-
-callback = function (p, l)
-    println("Current loss is: $l")
+function callback(p, l)
+    if p.iter == 1 || p.iter % 10 == 0
+        println("Current loss is: $l after $(p.iter) iterations")
+    end
     return false
 end
 
-@testset "Example 1 - 1D" begin
+export callback
+
+end
+
+@testitem "IntegroDiff Example 1 -- 1D" tags=[:integrodiff] setup=[IntegroDiffTestSetup] begin
+    using Optimization, Optimisers, DomainSets, Lux, Random, Statistics
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
+    Random.seed!(110)
+
     @parameters t
     @variables i(..)
     Di = Differential(t)
@@ -17,139 +25,190 @@ end
     eq = Di(i(t)) + 2 * i(t) + 5 * Ii(i(t)) ~ 1
     bcs = [i(0.0) ~ 0.0]
     domains = [t ∈ Interval(0.0, 2.0)]
+
     chain = Chain(Dense(1, 15, σ), Dense(15, 1))
-    strategy_ = GridTraining(0.1)
-    discretization = PhysicsInformedNN(chain, strategy_)
+    strategy = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chain, strategy)
     @named pde_system = PDESystem(eq, bcs, domains, [t], [i(t)])
     prob = discretize(pde_system, discretization)
-    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 100)
+    res = solve(prob, BFGS(); callback, maxiters = 100)
     ts = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
     phi = discretization.phi
     analytic_sol_func(t) = 1 / 2 * (exp(-t)) * (sin(2 * t))
+
     u_real = [analytic_sol_func(t) for t in ts]
     u_predict = [first(phi([t], res.u)) for t in ts]
     @test mean(abs2, u_real .- u_predict) < 0.01
 end
 
-@testset "Example 2 - 1D" begin
+@testitem "IntegroDiff Example 2 -- 1D" tags=[:integrodiff] setup=[IntegroDiffTestSetup] begin
+    using Optimization, Optimisers, DomainSets, Lux, Random, Statistics
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
+    Random.seed!(110)
+
     @parameters x
     @variables u(..)
     Ix = Integral(x in DomainSets.ClosedInterval(0, x))
-    eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
-    eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
 
     eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
-
     bcs = [u(0.0) ~ 0.0]
     domains = [x ∈ Interval(0.0, 1.00)]
+
     chain = Chain(Dense(1, 15, σ), Dense(15, 1))
-    strategy_ = GridTraining(0.1)
-    discretization = PhysicsInformedNN(chain, strategy_)
+    strategy = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chain, strategy)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
     prob = discretize(pde_system, discretization)
-    res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
-        maxiters = 200)
+    res = solve(prob, BFGS(); callback, maxiters = 100)
     xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
     phi = discretization.phi
-    u_predict = [first(phi([x], res.u)) for x in xs]
+
     u_real = [x^2 / cos(x) for x in xs]
+    u_predict = [first(phi([x], res.u)) for x in xs]
     @test mean(abs2, u_real .- u_predict) < 0.01
 end
 
-@testset "Example 3 - 2 Inputs, 1 Output" begin
+@testitem "IntegroDiff Example 3 -- 2 Inputs, 1 Output" tags=[:integrodiff] setup=[IntegroDiffTestSetup] begin
+    using Optimization, Optimisers, DomainSets, Lux, Random, Statistics
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
+    Random.seed!(110)
+
     @parameters x, y
     @variables u(..)
     Dx = Differential(x)
     Dy = Differential(y)
     Ix = Integral((x, y) in DomainSets.UnitSquare())
+
     eq = Ix(u(x, y)) ~ 1 / 3
     bcs = [u(0.0, 0.0) ~ 1, Dx(u(x, y)) ~ -2 * x, Dy(u(x, y)) ~ -2 * y]
-    domains = [x ∈ Interval(0.0, 1.00), y ∈ Interval(0.0, 1.00)]
+    domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
+
     chain = Chain(Dense(2, 15, σ), Dense(15, 1))
-    strategy_ = GridTraining(0.1)
-    discretization = PhysicsInformedNN(chain, strategy_)
+    strategy = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chain, strategy)
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
     prob = discretize(pde_system, discretization)
-    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 100)
-    xs = 0.00:0.01:1.00
-    ys = 0.00:0.01:1.00
+    res = solve(prob, BFGS(); callback, maxiters = 100)
     phi = discretization.phi
+
+    xs = 0.0:0.01:1.0
+    ys = 0.0:0.01:1.0
+
     u_real = collect(1 - x^2 - y^2 for y in ys, x in xs)
     u_predict = collect(Array(phi([x, y], res.u))[1] for y in ys, x in xs)
     @test mean(abs2, u_real .- u_predict) < 0.001
 end
 
-@testset "Example 4 - 2 Inputs, 1 Output" begin
+@testitem "IntegroDiff Example 4 -- 2 Inputs, 1 Output" tags=[:integrodiff] setup=[IntegroDiffTestSetup] begin
+    using Optimization, Optimisers, DomainSets, Lux, Random, Statistics
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
+    Random.seed!(110)
+
     @parameters x, y
     @variables u(..)
     Dx = Differential(x)
     Dy = Differential(y)
     Ix = Integral((x, y) in DomainSets.ProductDomain(UnitInterval(), ClosedInterval(0, x)))
+
     eq = Ix(u(x, y)) ~ 5 / 12
     bcs = [u(0.0, 0.0) ~ 0, Dy(u(x, y)) ~ 2 * y, u(x, 0) ~ x]
-    domains = [x ∈ Interval(0.0, 1.00), y ∈ Interval(0.0, 1.00)]
+    domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
+
     chain = Chain(Dense(2, 15, σ), Dense(15, 1))
-    strategy_ = GridTraining(0.1)
-    discretization = PhysicsInformedNN(chain, strategy_)
+    strategy = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chain, strategy)
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
     prob = discretize(pde_system, discretization)
-    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 100)
-    xs = 0.00:0.01:1.00
-    ys = 0.00:0.01:1.00
+    res = solve(prob, BFGS(); callback, maxiters = 100)
     phi = discretization.phi
+
+    xs = 0.0:0.01:1.0
+    ys = 0.0:0.01:1.0
+
     u_real = collect(x + y^2 for y in ys, x in xs)
     u_predict = collect(Array(phi([x, y], res.u))[1] for y in ys, x in xs)
     @test mean(abs2, u_real .- u_predict) < 0.01
 end
 
-@testset "Example 5 - 1 Input, 2 Outputs" begin
+@testitem "IntegroDiff Example 5 -- 1 Input, 2 Outputs" tags=[:integrodiff] setup=[IntegroDiffTestSetup] begin
+    using Optimization, Optimisers, DomainSets, Lux, Random, Statistics
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
+    Random.seed!(110)
+
     @parameters x
     @variables u(..) w(..)
     Dx = Differential(x)
     Ix = Integral(x in DomainSets.ClosedInterval(1, x))
+
     eqs = [Ix(u(x) * w(x)) ~ log(abs(x)), Dx(w(x)) ~ -2 / (x^3), u(x) ~ x]
     bcs = [u(1.0) ~ 1.0, w(1.0) ~ 1.0]
     domains = [x ∈ Interval(1.0, 2.0)]
+
     chains = [Chain(Dense(1, 15, σ), Dense(15, 1)) for _ in 1:2]
-    strategy_ = GridTraining(0.1)
-    discretization = PhysicsInformedNN(chains, strategy_)
+    strategy = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chains, strategy)
     @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x), w(x)])
     prob = discretize(pde_system, discretization)
-    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 200)
+
+    res = solve(prob, BFGS(); callback, maxiters = 200)
     xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
     phi = discretization.phi
     u_predict = [(phi[1]([x], res.u.depvar.u))[1] for x in xs]
     w_predict = [(phi[2]([x], res.u.depvar.w))[1] for x in xs]
     u_real = [x for x in xs]
     w_real = [1 / x^2 for x in xs]
+
     @test mean(abs2, u_real .- u_predict) < 0.001
     @test mean(abs2, w_real .- w_predict) < 0.001
 end
 
-@testset "Example 6: Infinity" begin
+@testitem "IntegroDiff Example 6: Infinity" tags=[:integrodiff] setup=[IntegroDiffTestSetup] begin
+    using Optimization, Optimisers, DomainSets, Lux, Random, Statistics
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
+    Random.seed!(110)
+
     @parameters x
     @variables u(..)
     I = Integral(x in ClosedInterval(1, x))
     Iinf = Integral(x in ClosedInterval(1, Inf))
+
     eqs = [I(u(x)) ~ Iinf(u(x)) - 1 / x]
     bcs = [u(1) ~ 1]
     domains = [x ∈ Interval(1.0, 2.0)]
+
     chain = Chain(Dense(1, 10, σ), Dense(10, 1))
     discretization = PhysicsInformedNN(chain, NeuralPDE.GridTraining(0.1))
     @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x)])
     prob = discretize(pde_system, discretization)
-    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 200)
+    res = solve(prob, BFGS(); callback, maxiters = 200)
     xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
     phi = discretization.phi
     u_predict = [first(phi([x], res.u)) for x in xs]
     u_real = [1 / x^2 for x in xs]
-    @test u_real≈u_predict rtol=10^-1
+    @test u_real≈u_predict rtol=0.1
 end
 
-@testset "Example 7: Infinity" begin
+@testitem "IntegroDiff Example 7: Infinity" tags=[:integrodiff] setup=[IntegroDiffTestSetup] begin
+    using Optimization, Optimisers, DomainSets, Lux, Random, Statistics
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
+    Random.seed!(110)
+
     @parameters x
     @variables u(..)
     I = Integral(x in ClosedInterval(x, Inf))
+
     eq = I(u(x)) ~ 1 / x
     domains = [x ∈ Interval(1.0, 2.0)]
     bcs = [u(1) ~ 1]
@@ -157,10 +216,10 @@ end
     discretization = PhysicsInformedNN(chain, GridTraining(0.1))
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
     prob = discretize(pde_system, discretization)
-    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 300)
+    res = solve(prob, BFGS(); callback, maxiters = 300)
     xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
     phi = discretization.phi
     u_predict = [first(phi([x], res.u)) for x in xs]
     u_real = [1 / x^2 for x in xs]
-    @test u_real≈u_predict rtol=10^-2
+    @test u_real≈u_predict rtol=0.01
 end
diff --git a/test/NNDAE_tests.jl b/test/NNDAE_tests.jl
index cc36fd09e8..003cdae547 100644
--- a/test/NNDAE_tests.jl
+++ b/test/NNDAE_tests.jl
@@ -1,18 +1,18 @@
-using Test, Random, NeuralPDE, OrdinaryDiffEq, Statistics, Lux, Optimisers,
-      OptimizationOptimJL, Optimisers
+@testitem "DAE Case I" tags=[:nnode] begin
+    using Random, OrdinaryDiffEq, Statistics, Lux, Optimisers
 
-Random.seed!(100)
+    Random.seed!(100)
 
-@testset "Example 1" begin
     function example1(du, u, p, t)
         du[1] = cos(2pi * t)
         du[2] = u[2] + cos(2pi * t)
         nothing
     end
+
     u₀ = [1.0, -1.0]
     du₀ = [0.0, 0.0]
-    M = [1.0 0
-         0 0]
+    M = [1.0 0.0
+         0.0 0.0]
     f = ODEFunction(example1, mass_matrix = M)
     tspan = (0.0f0, 1.0f0)
 
@@ -22,21 +22,27 @@ Random.seed!(100)
     example = (du, u, p, t) -> [cos(2pi * t) - du[1], u[2] + cos(2pi * t) - du[2]]
     prob = DAEProblem(example, du₀, u₀, tspan; differential_vars = [true, false])
     chain = Chain(Dense(1, 15, cos), Dense(15, 15, sin), Dense(15, 2))
-    alg = NNDAE(chain, Optimisers.Adam(0.01); autodiff = false)
+    alg = NNDAE(chain, Adam(0.01); autodiff = false)
 
     sol = solve(
-        prob, alg, verbose = false, dt = 1 / 100.0f0, maxiters = 3000, abstol = 1.0f-10)
+        prob, alg; verbose = false, dt = 1 / 100.0f0, maxiters = 3000, abstol = 1.0f-10)
+
     @test ground_sol(0:(1 / 100):1)≈sol atol=0.4
 end
 
-@testset "Example 2" begin
+@testitem "DAE Case II" tags=[:nnode] begin
+    using Random, OrdinaryDiffEq, Statistics, Lux, Optimisers
+
+    Random.seed!(100)
+
     function example2(du, u, p, t)
         du[1] = u[1] - t
         du[2] = u[2] - t
         nothing
     end
-    M = [0.0 0
-         0 1]
+
+    M = [0.0 0.0
+         0.0 1.0]
     u₀ = [0.0, 0.0]
     du₀ = [0.0, 0.0]
     tspan = (0.0f0, pi / 2.0f0)
@@ -48,7 +54,7 @@ end
     differential_vars = [false, true]
     prob = DAEProblem(example, du₀, u₀, tspan; differential_vars = differential_vars)
     chain = Chain(Dense(1, 15, σ), Dense(15, 2))
-    alg = NNDAE(chain, Optimisers.Adam(0.1); autodiff = false)
+    alg = NNDAE(chain, Adam(0.1); autodiff = false)
 
     sol = solve(prob,
         alg, verbose = false, dt = 1 / 100.0f0, maxiters = 3000, abstol = 1.0f-10)
diff --git a/test/NNODE_tests.jl b/test/NNODE_tests.jl
index 96fc17a194..59a185dd98 100644
--- a/test/NNODE_tests.jl
+++ b/test/NNODE_tests.jl
@@ -1,190 +1,185 @@
-using Test, Random, NeuralPDE, OrdinaryDiffEq, Statistics, Lux, OptimizationOptimisers,
-      OptimizationOptimJL, WeightInitializers, LineSearches
-import Flux
+# using Test, Random, NeuralPDE, OrdinaryDiffEq, Statistics, Lux, OptimizationOptimisers,
+#       OptimizationOptimJL, WeightInitializers, LineSearches
+# import Flux
 
-rng = Random.default_rng()
-Random.seed!(100)
+# rng = Random.default_rng()
+# Random.seed!(100)
 
-@testset "Scalar" begin
-    linear = (u, p, t) -> cos(2pi * t)
+@testitem "Scalar" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, Optimisers
+    using OptimizationOptimJL: BFGS
+
+    Random.seed!(100)
+
+    linear = (u, p, t) -> cospi(2t)
     tspan = (0.0f0, 1.0f0)
     u0 = 0.0f0
     prob = ODEProblem(linear, u0, tspan)
     luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
-    opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
 
-    sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = false,
-        abstol = 1.0f-10, maxiters = 200)
+    @testset "$(nameof(typeof(opt))) -- $(autodiff)" for opt in [BFGS(), Adam(0.1)],
+        autodiff in [false, true]
 
-    @test_throws ArgumentError solve(prob, NNODE(luxchain, opt; autodiff = true),
-        dt = 1 / 20.0f0, verbose = false, abstol = 1.0f-10, maxiters = 200)
+        if autodiff
+            @test_throws ArgumentError solve(
+                prob, NNODE(luxchain, opt; autodiff); maxiters = 200, dt = 1 / 20.0f0)
+            continue
+        end
 
-    sol = solve(prob, NNODE(luxchain, opt), verbose = false,
-        abstol = 1.0f-6, maxiters = 200)
+        @testset for (dt, abstol) in [(1 / 20.0f0, 1e-10), (nothing, 1e-6)]
+            kwargs = (; verbose = false, dt, abstol, maxiters = 200)
+            sol = solve(prob, NNODE(luxchain, opt; autodiff); kwargs...)
+        end
+    end
+end
 
-    opt = OptimizationOptimJL.BFGS()
-    sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = false,
-        abstol = 1.0f-10, maxiters = 200)
+@testitem "Vector" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, Optimisers
+    using OptimizationOptimJL: BFGS
 
-    sol = solve(prob, NNODE(luxchain, opt), verbose = false,
-        abstol = 1.0f-6, maxiters = 200)
-end
+    Random.seed!(100)
 
-@testset "Vector" begin
-    linear = (u, p, t) -> [cos(2pi * t)]
+    linear = (u, p, t) -> [cospi(2t)]
     tspan = (0.0f0, 1.0f0)
     u0 = [0.0f0]
     prob = ODEProblem(linear, u0, tspan)
     luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
 
-    opt = OptimizationOptimJL.BFGS()
-    sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, abstol = 1e-10,
-        verbose = false, maxiters = 200)
+    @testset "$(nameof(typeof(opt))) -- $(autodiff)" for opt in [BFGS(), Adam(0.1)],
+        autodiff in [false, true]
 
-    @test_throws ArgumentError solve(prob, NNODE(luxchain, opt; autodiff = true),
-        dt = 1 / 20.0f0, abstol = 1e-10, verbose = false, maxiters = 200)
+        sol = solve(
+            prob, NNODE(luxchain, opt); verbose = false, maxiters = 200, abstol = 1e-6)
 
-    sol = solve(prob, NNODE(luxchain, opt), abstol = 1.0f-6,
-        verbose = false, maxiters = 200)
-
-    @test sol(0.5) isa Vector
-    @test sol(0.5; idxs = 1) isa Number
-    @test sol.k isa SciMLBase.OptimizationSolution
+        @test sol(0.5) isa Vector
+        @test sol(0.5; idxs = 1) isa Number
+        @test sol.k isa SciMLBase.OptimizationSolution
+    end
 end
 
-@testset "Example 1" begin
+@testitem "ODE I" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, Optimisers
+
     linear = (u, p, t) -> @. t^3 + 2 * t + (t^2) * ((1 + 3 * (t^2)) / (1 + t + (t^3))) -
                              u * (t + ((1 + 3 * (t^2)) / (1 + t + t^3)))
     linear_analytic = (u0, p, t) -> [exp(-(t^2) / 2) / (1 + t + t^3) + t^2]
     prob = ODEProblem(
-        ODEFunction(linear, analytic = linear_analytic), [1.0f0], (0.0f0, 1.0f0))
+        ODEFunction(linear; analytic = linear_analytic), [1.0f0], (0.0f0, 1.0f0))
     luxchain = Chain(Dense(1, 128, σ), Dense(128, 1))
-    opt = OptimizationOptimisers.Adam(0.01)
-
-    sol = solve(prob, NNODE(luxchain, opt), verbose = false, maxiters = 400)
-    @test sol.errors[:l2] < 0.5
-
-    sol = solve(
-        prob, NNODE(luxchain, opt; batch = false, strategy = StochasticTraining(100)),
-        verbose = false, maxiters = 400)
-    @test sol.errors[:l2] < 0.5
+    opt = Adam(0.01)
 
-    sol = solve(
-        prob, NNODE(luxchain, opt; batch = true, strategy = StochasticTraining(100)),
-        verbose = false, maxiters = 400)
-    @test sol.errors[:l2] < 0.5
+    @testset for strategy in [nothing, StochasticTraining(100)], batch in [false, true]
+        sol = solve(
+            prob, NNODE(luxchain, opt; batch, strategy); verbose = false, maxiters = 200,
+            abstol = 1e-6)
+        @test sol.errors[:l2] < 0.5
+    end
+end
 
-    sol = solve(prob, NNODE(luxchain, opt; batch = false), verbose = false,
-        maxiters = 400, dt = 1 / 5.0f0)
-    @test sol.errors[:l2] < 0.5
+@testitem "ODE Example 2" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, Optimisers
 
-    sol = solve(prob, NNODE(luxchain, opt; batch = true),
-        verbose = false, maxiters = 400, dt = 1 / 5.0f0)
-    @test sol.errors[:l2] < 0.5
-end
+    Random.seed!(100)
 
-@testset "Example 2" begin
     linear = (u, p, t) -> -u / 5 + exp(-t / 5) .* cos(t)
     linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t))
     prob = ODEProblem(
-        ODEFunction(linear, analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
+        ODEFunction(linear; analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
     luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
 
-    @testset for batch in (true, false), strategy in (StochasticTraining(100), nothing)
-        opt = OptimizationOptimisers.Adam(0.1)
-        sol = solve(prob, NNODE(luxchain, opt; batch, strategy),
-            verbose = false, maxiters = 400, abstol = 1.0f-8)
+    @testset for batch in [false, true], strategy in [StochasticTraining(100), nothing]
+        opt = Adam(0.1)
+        sol = solve(
+            prob, NNODE(luxchain, opt; batch, strategy); verbose = false, maxiters = 200,
+            abstol = 1e-6)
         @test sol.errors[:l2] < 0.5
     end
 end
 
-@testset "Example 3" begin
-    linear = (u, p, t) -> [cos(2pi * t), sin(2pi * t)]
+@testitem "ODE Example 3" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, Optimisers
+
+    Random.seed!(100)
+
+    linear = (u, p, t) -> [cospi(2t), sinpi(2t)]
     tspan = (0.0f0, 1.0f0)
     u0 = [0.0f0, -1.0f0 / 2pi]
-    linear_analytic = (u0, p, t) -> [sin(2pi * t) / 2pi, -cos(2pi * t) / 2pi]
-    odefunction = ODEFunction(linear, analytic = linear_analytic)
+    linear_analytic = (u0, p, t) -> [sinpi(2t) / 2pi, -cospi(2t) / 2pi]
+    odefunction = ODEFunction(linear; analytic = linear_analytic)
     prob = ODEProblem(odefunction, u0, tspan)
     luxchain = Chain(Dense(1, 10, σ), Dense(10, 2))
-    opt = OptimizationOptimisers.Adam(0.1)
+    opt = Adam(0.1)
     alg = NNODE(luxchain, opt; autodiff = false)
 
     sol = solve(
-        prob, alg, verbose = false, dt = 1 / 40.0f0, maxiters = 2000, abstol = 1.0f-7)
+        prob, alg; verbose = false, maxiters = 1000, abstol = 1e-6, saveat = 0.01)
+
     @test sol.errors[:l2] < 0.5
 end
 
-@testset "Training Strategies" begin
-    @testset "WeightedIntervalTraining" begin
-        function f(u, p, t)
-            [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
-        end
-        p = [1.5, 1.0, 3.0, 1.0]
-        u0 = [1.0, 1.0]
-        prob_oop = ODEProblem{false}(f, u0, (0.0, 3.0), p)
-        true_sol = solve(prob_oop, Tsit5(), saveat = 0.01)
-
-        N = 64
-        chain = Chain(
-            Dense(1, N, gelu),
-            Dense(N, N, gelu),
-            Dense(N, N, gelu),
-            Dense(N, N, gelu),
-            Dense(N, length(u0))
-        )
-        opt = OptimizationOptimisers.Adam(0.001)
-        weights = [0.7, 0.2, 0.1]
-        points = 200
-        alg = NNODE(chain, opt, autodiff = false,
-            strategy = WeightedIntervalTraining(weights, points))
-        sol = solve(prob_oop, alg; verbose = false, maxiters = 5000, saveat = 0.01)
-        @test abs(mean(sol) - mean(true_sol)) < 0.2
+@testitem "Training Strategy: WeightedIntervalTraining" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, Optimisers, Statistics
+
+    Random.seed!(100)
+
+    function f(u, p, t)
+        [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
     end
+    p = [1.5, 1.0, 3.0, 1.0]
+    u0 = [1.0, 1.0]
+    prob_oop = ODEProblem{false}(f, u0, (0.0, 3.0), p)
+    true_sol = solve(prob_oop, Tsit5(); saveat = 0.01)
+
+    N = 64
+    chain = Chain(Dense(1, N, gelu), Dense(N, N, gelu), Dense(N, N, gelu),
+        Dense(N, N, gelu), Dense(N, length(u0)))
+
+    alg = NNODE(
+        chain, Adam(0.01); strategy = WeightedIntervalTraining([0.7, 0.2, 0.1], 200))
+
+    sol = solve(prob_oop, alg; verbose = false, maxiters = 5000, saveat = 0.01)
+    @test abs(mean(sol) - mean(true_sol)) < 0.2
+end
+
+@testitem "Training Strategy: Others" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, Optimisers
 
-    linear = (u, p, t) -> cos(2pi * t)
-    linear_analytic = (u, p, t) -> (1 / (2pi)) * sin(2pi * t)
+    Random.seed!(100)
+
+    linear = (u, p, t) -> cospi(2t)
+    linear_analytic = (u, p, t) -> (1 / (2pi)) * sinpi(2t)
     tspan = (0.0, 1.0)
     dt = (tspan[2] - tspan[1]) / 99
     ts = collect(tspan[1]:dt:tspan[2])
-    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), 0.0, (0.0, 1.0))
-    opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
-    u_analytical(x) = (1 / (2pi)) .* sin.(2pi .* x)
-
-    @testset "GridTraining" begin
-        luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
-        (u_, t_) = (u_analytical(ts), ts)
-        function additional_loss(phi, θ)
-            return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
-        end
-        alg1 = NNODE(luxchain, opt; strategy = GridTraining(0.01), additional_loss)
-        sol1 = solve(prob, alg1; verbose = false, abstol = 1e-8, maxiters = 500)
-        @test sol1.errors[:l2] < 0.5
-    end
+    prob = ODEProblem(ODEFunction(linear; analytic = linear_analytic), 0.0, (0.0, 1.0))
+    opt = Adam(0.1, (0.9, 0.95))
+    u_analytical(x) = (1 / (2pi)) .* sinpi.(2x)
 
-    @testset "QuadratureTraining" begin
-        luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
-        (u_, t_) = (u_analytical(ts), ts)
-        function additional_loss(phi, θ)
-            return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
-        end
-        alg1 = NNODE(luxchain, opt; additional_loss)
-        sol1 = solve(prob, alg1; verbose = false, abstol = 1e-10, maxiters = 200)
-        @test sol1.errors[:l2] < 0.5
+    luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
+    u_, t_ = u_analytical(ts), ts
+
+    function additional_loss(phi, θ)
+        return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
     end
 
-    @testset "StochasticTraining" begin
-        luxchain = Chain(Dense(1, 5, σ), Dense(5, 1))
-        (u_, t_) = (u_analytical(ts), ts)
-        function additional_loss(phi, θ)
-            return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
-        end
-        alg1 = NNODE(luxchain, opt; strategy = StochasticTraining(1000), additional_loss)
-        sol1 = solve(prob, alg1; verbose = false, abstol = 1e-8, maxiters = 500)
-        @test sol1.errors[:l2] < 0.5
+    @testset "$(nameof(typeof(strategy)))" for strategy in [
+        GridTraining(0.01),
+        StochasticTraining(1000),
+        QuadratureTraining(reltol = 1e-3, abstol = 1e-6, maxiters = 50, batch = 100)
+    ]
+        alg = NNODE(luxchain, opt; additional_loss, strategy)
+        @test begin
+            sol = solve(prob, alg; verbose = false, maxiters = 500, abstol = 1e-6)
+            sol.errors[:l2] < 0.5
+        end broken=(strategy isa QuadratureTraining)
     end
 end
 
-@testset "Parameter Estimation" begin
+@testitem "ODE Parameter Estimation" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, OptimizationOptimJL, LineSearches
+
+    Random.seed!(100)
+
     function lorenz(u, p, t)
         return [p[1] * (u[2] - u[1]),
             u[1] * (p[2] - u[3]) - u[2],
@@ -193,28 +188,28 @@ end
     prob = ODEProblem(lorenz, [1.0, 0.0, 0.0], (0.0, 1.0), [1.0, 1.0, 1.0])
     true_p = [2.0, 3.0, 2.0]
     prob2 = remake(prob, p = true_p)
-    sol = solve(prob2, Tsit5(), saveat = 0.01)
+    sol = solve(prob2, Tsit5(); saveat = 0.01)
     t_ = sol.t
     u_ = reduce(hcat, sol.u)
     function additional_loss(phi, θ)
         return sum(abs2, phi(t_, θ) .- u_) / 100
     end
     n = 8
-    luxchain = Chain(
-        Dense(1, n, σ),
-        Dense(n, n, σ),
-        Dense(n, n, σ),
-        Dense(n, 3)
-    )
-    opt = OptimizationOptimJL.BFGS(linesearch = BackTracking())
-    alg = NNODE(luxchain, opt; strategy = GridTraining(0.01),
+    luxchain = Chain(Dense(1, n, σ), Dense(n, n, σ), Dense(n, 3))
+
+    alg = NNODE(luxchain, BFGS(linesearch = BackTracking()); strategy = GridTraining(0.01),
         param_estim = true, additional_loss)
+
     sol = solve(prob, alg; verbose = false, abstol = 1e-8, maxiters = 1000, saveat = t_)
     @test sol.k.u.p≈true_p atol=1e-2
     @test reduce(hcat, sol.u)≈u_ atol=1e-2
 end
 
-@testset "Complex Numbers" begin
+@testitem "ODE Complex Numbers" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, Optimisers
+
+    Random.seed!(100)
+
     function bloch_equations(u, p, t)
         Ω, Δ, Γ = p
         γ = Γ / 2
@@ -234,41 +229,97 @@ end
     problem = ODEProblem(bloch_equations, u0, time_span, parameters)
 
     chain = Chain(
-        Dense(1, 16, tanh;
-            init_weight = (rng, a...) -> kaiming_normal(rng, ComplexF64, a...)),
-        Dense(
-            16, 4; init_weight = (rng, a...) -> kaiming_normal(rng, ComplexF64, a...))
+        Dense(1, 16, tanh; init_weight = kaiming_normal(ComplexF64)),
+        Dense(16, 4; init_weight = kaiming_normal(ComplexF64))
     )
-    ps, st = Lux.setup(rng, chain)
+    ps, st = Lux.setup(Random.default_rng(), chain)
 
-    opt = OptimizationOptimisers.Adam(0.01)
     ground_truth = solve(problem, Tsit5(), saveat = 0.01)
-    strategies = [StochasticTraining(500), GridTraining(0.01),
-        WeightedIntervalTraining([0.1, 0.4, 0.4, 0.1], 500)]
+
+    strategies = [
+        StochasticTraining(500),
+        GridTraining(0.01),
+        WeightedIntervalTraining([0.1, 0.4, 0.4, 0.1], 500)
+    ]
 
     @testset "$(nameof(typeof(strategy)))" for strategy in strategies
-        alg = NNODE(chain, opt, ps; strategy)
-        sol = solve(problem, alg, verbose = false, maxiters = 5000, saveat = 0.01)
+        alg = NNODE(chain, Adam(0.01); strategy)
+        sol = solve(problem, alg; verbose = false, maxiters = 5000, saveat = 0.01)
         @test sol.u≈ground_truth.u rtol=1e-1
     end
 
-    alg = NNODE(chain, opt, ps; strategy = QuadratureTraining())
+    alg = NNODE(chain, Adam(0.01); strategy = QuadratureTraining())
     @test_throws ErrorException solve(
-        problem, alg, verbose = false, maxiters = 5000, saveat = 0.01)
+        problem, alg; verbose = false, maxiters = 5000, saveat = 0.01)
 end
 
-@testset "Translating from Flux" begin
-    linear = (u, p, t) -> cos(2pi * t)
-    linear_analytic = (u, p, t) -> (1 / (2pi)) * sin(2pi * t)
-    tspan = (0.0, 1.0)
+@testitem "NNODE: Translating from Flux" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, Optimisers
+    import Flux
+
+    Random.seed!(100)
+
+    linear = (u, p, t) -> cospi(2t)
+    linear_analytic = (u, p, t) -> (1 / (2pi)) * sinpi(2t)
+    tspan = (0.0f0, 1.0f0)
     dt = (tspan[2] - tspan[1]) / 99
     ts = collect(tspan[1]:dt:tspan[2])
-    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), 0.0, (0.0, 1.0))
-    opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
-    u_analytical(x) = (1 / (2pi)) .* sin.(2pi .* x)
+    prob = ODEProblem(
+        ODEFunction(linear; analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
+
+    u_analytical(x) = (1 / (2pi)) .* sinpi.(2x)
     fluxchain = Flux.Chain(Flux.Dense(1, 5, Flux.σ), Flux.Dense(5, 1))
-    alg1 = NNODE(fluxchain, opt)
-    @test alg1.chain isa AbstractLuxLayer
-    sol1 = solve(prob, alg1, verbose = false, abstol = 1e-10, maxiters = 200)
-    @test sol1.errors[:l2] < 0.5
+    opt = Adam(0.1)
+
+    alg = NNODE(fluxchain, opt)
+    @test alg.chain isa Lux.AbstractLuxLayer
+    sol = solve(prob, alg; verbose = false, abstol = 1e-10, maxiters = 200)
+    @test sol.errors[:l2] < 0.5
+end
+
+@testitem "Training Strategy with `tstops`" tags=[:nnode] begin
+    using OrdinaryDiffEq, Random, Lux, Optimisers, Statistics
+
+    Random.seed!(100)
+
+    function f(u, p, t)
+        [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
+    end
+    p = [1.5, 1.0, 3.0, 1.0]
+    u0 = [1.0, 1.0]
+
+    tspan = (0.0, 3.0)
+    points1 = rand(280)
+    points2 = rand(80) .+ 1
+    points3 = rand(40) .+ 2
+    addedPoints = vcat(points1, points2, points3)
+
+    saveat = 0.01
+
+    prob_oop = ODEProblem{false}(f, u0, tspan, p)
+    true_sol = solve(prob_oop, Tsit5(); saveat)
+    N = 16
+    chain = Chain(Dense(1 => N, σ), Dense(N => N, σ), Dense(N => N, σ), Dense(N => N, σ),
+        Dense(N => length(u0)))
+
+    threshold = 0.2
+
+    @testset "$(nameof(typeof(strategy)))" for strategy in [
+        GridTraining(1.0),
+        WeightedIntervalTraining([0.3, 0.3, 0.4], 3),
+        StochasticTraining(3)
+    ]
+        alg = NNODE(chain, Adam(0.01); strategy, tstops = addedPoints)
+
+        @testset "Without added points" begin
+            sol = solve(prob_oop, alg; verbose = false, maxiters = 10000, saveat)
+            @test abs(mean(sol) - mean(true_sol)) ≥ threshold
+        end
+
+        @testset "With added points" begin
+            sol = solve(prob_oop, alg; verbose = false,
+                maxiters = 10000, saveat, tstops = addedPoints)
+            @test abs(mean(sol) - mean(true_sol)) < threshold
+        end
+    end
 end
diff --git a/test/NNODE_tstops_test.jl b/test/NNODE_tstops_test.jl
deleted file mode 100644
index 82f0278a5d..0000000000
--- a/test/NNODE_tstops_test.jl
+++ /dev/null
@@ -1,43 +0,0 @@
-using OrdinaryDiffEq, Lux, OptimizationOptimisers, Optimisers, Test, Statistics, NeuralPDE
-
-function fu(u, p, t)
-    [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
-end
-
-p = [1.5, 1.0, 3.0, 1.0]
-u0 = [1.0, 1.0]
-tspan = (0.0, 3.0)
-points1 = [rand() for i in 1:280]
-points2 = [rand() + 1 for i in 1:80]
-points3 = [rand() + 2 for i in 1:40]
-addedPoints = vcat(points1, points2, points3)
-
-saveat = 0.01
-
-prob_oop = ODEProblem{false}(fu, u0, tspan, p)
-true_sol = solve(prob_oop, Tsit5(); saveat)
-N = 16
-chain = Chain(
-    Dense(1, N, σ), Dense(N, N, σ), Dense(N, N, σ), Dense(N, N, σ), Dense(N, length(u0)))
-
-opt = Adam(0.01)
-threshold = 0.2
-
-@testset "$(nameof(typeof(strategy)))" for strategy in [
-    GridTraining(1.0),
-    WeightedIntervalTraining([0.3, 0.3, 0.4], 3),
-    StochasticTraining(3)
-]
-    alg = NNODE(chain, opt; autodiff = false, strategy)
-
-    @testset "Without added points" begin
-        sol = solve(prob_oop, alg; verbose = false, maxiters = 1000, saveat)
-        @test abs(mean(sol) - mean(true_sol)) > threshold
-    end
-
-    @testset "With added points" begin
-        sol = solve(
-            prob_oop, alg; verbose = false, maxiters = 10000, saveat, tstops = addedPoints)
-        @test abs(mean(sol) - mean(true_sol)) < threshold
-    end
-end
diff --git a/test/NNPDE_tests_gpu_Lux.jl b/test/NNPDE_cuda_tests.jl
similarity index 61%
rename from test/NNPDE_tests_gpu_Lux.jl
rename to test/NNPDE_cuda_tests.jl
index 90674b23ff..8de4163c5e 100644
--- a/test/NNPDE_tests_gpu_Lux.jl
+++ b/test/NNPDE_cuda_tests.jl
@@ -1,17 +1,26 @@
-using Lux, ComponentArrays, OptimizationOptimisers, Test, NeuralPDE, Optimization, LuxCUDA,
-      QuasiMonteCarlo, Random
-import ModelingToolkit: Interval, infimum, supremum
+@testsetup module CUDATestSetup
 
-Random.seed!(100)
+using LuxCUDA, Lux
 
-callback = function (p, l)
-    println("Current loss is: $l")
+function callback(p, l)
+    if p.iter == 1 || p.iter % 250 == 0
+        println("Current loss is: $l after $(p.iter) iterations")
+    end
     return false
 end
 
 const gpud = gpu_device()
 
-@testset "ODE" begin
+export gpud, callback
+
+end
+
+@testitem "1D ODE - CUDA" tags=[:cuda] setup=[CUDATestSetup] begin
+    using Lux, Optimization, OptimizationOptimisers, Random, ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
     @parameters θ
     @variables u(..)
     Dθ = Differential(θ)
@@ -25,41 +34,50 @@ const gpud = gpu_device()
 
     # Space and time domains
     domains = [θ ∈ Interval(0.0f0, 1.0f0)]
+
     # Discretization
     dt = 0.1f0
+
     # Neural network
     inner = 20
     chain = Chain(Dense(1, inner, σ), Dense(inner, inner, σ), Dense(inner, inner, σ),
         Dense(inner, inner, σ), Dense(inner, inner, σ), Dense(inner, 1))
 
     strategy = GridTraining(dt)
-    ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud
+    ps = Lux.initialparameters(Random.default_rng(), chain) |> ComponentArray |> gpud
     discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u(θ)])
     prob = discretize(pde_system, discretization)
-    res = solve(prob, OptimizationOptimisers.Adam(1e-2); maxiters = 2000)
+    res = solve(prob, Adam(1e-2); maxiters = 2000)
     phi = discretization.phi
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
+
     u_real = [analytic_sol_func(t) for t in ts]
     u_predict = [first(Array(phi([t], res.u))) for t in ts]
     @test u_predict≈u_real atol=0.2
 end
 
-@testset "1D PDE Dirichlet boundary conditions" begin
+@testitem "1D PDE Dirichlet BC - CUDA" tags=[:cuda] setup=[CUDATestSetup] begin
+    using Lux, Optimization, OptimizationOptimisers, Random, ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
     @parameters t x
     @variables u(..)
     Dt = Differential(t)
     Dxx = Differential(x)^2
 
     eq = Dt(u(t, x)) ~ Dxx(u(t, x))
-    bcs = [u(0, x) ~ cos(x),
+    bcs = [
+        u(0, x) ~ cos(x),
         u(t, 0) ~ exp(-t),
-        u(t, 1) ~ exp(-t) * cos(1)]
+        u(t, 1) ~ exp(-t) * cos(1)
+    ]
 
-    domains = [t ∈ Interval(0.0, 1.0),
-        x ∈ Interval(0.0, 1.0)]
+    domains = [t ∈ Interval(0.0, 1.0), x ∈ Interval(0.0, 1.0)]
 
     @named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
 
@@ -69,23 +87,31 @@ end
         Dense(inner, inner, σ), Dense(inner, inner, σ), Dense(inner, 1))
 
     strategy = StochasticTraining(500)
-    ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
+    ps = Lux.initialparameters(Random.default_rng(), chain) |> ComponentArray |> gpud |> f64
+
     discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
     prob = discretize(pdesys, discretization)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 1000)
+    res = solve(prob, Adam(0.01); maxiters = 1000)
     prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 1000)
+    res = solve(prob, Adam(0.001); maxiters = 1000)
     phi = discretization.phi
     u_exact = (t, x) -> exp.(-t) * cos.(x)
     ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-    u_predict = reshape([first(Array(phi([t, x], res.u))) for t in ts for x in xs],
-        (length(ts), length(xs)))
-    u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
+
+    u_predict = [first(Array(phi([t, x], res.u))) for t in ts for x in xs]
+    u_real = [u_exact(t, x) for t in ts for x in xs]
     diff_u = abs.(u_predict .- u_real)
+
     @test u_predict≈u_real atol=1.0
 end
 
-@testset "1D PDE Neumann boundary conditions and Float64 accuracy" begin
+@testitem "1D PDE Neumann BC - CUDA" tags=[:cuda] setup=[CUDATestSetup] begin
+    using Lux, Optimization, OptimizationOptimisers, Random, QuasiMonteCarlo,
+          ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
     @parameters t x
     @variables u(..)
     Dt = Differential(t)
@@ -94,13 +120,14 @@ end
 
     # 1D PDE and boundary conditions
     eq = Dt(u(t, x)) ~ Dxx(u(t, x))
-    bcs = [u(0, x) ~ cos(x),
+    bcs = [
+        u(0, x) ~ cos(x),
         Dx(u(t, 0)) ~ 0.0,
-        Dx(u(t, 1)) ~ -exp(-t) * sin(1.0)]
+        Dx(u(t, 1)) ~ -exp(-t) * sin(1.0)
+    ]
 
     # Space and time domains
-    domains = [t ∈ Interval(0.0, 1.0),
-        x ∈ Interval(0.0, 1.0)]
+    domains = [t ∈ Interval(0.0, 1.0), x ∈ Interval(0.0, 1.0)]
 
     # PDE system
     @named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
@@ -111,49 +138,57 @@ end
 
     strategy = QuasiRandomTraining(
         500; sampling_alg = SobolSample(), resampling = false, minibatch = 30)
-    ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
+    ps = Lux.initialparameters(Random.default_rng(), chain) |> ComponentArray |> gpud |> f64
+
     discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
     prob = discretize(pdesys, discretization)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 2000)
+    res = solve(prob, Adam(0.1); maxiters = 2000)
     prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2000)
+    res = solve(prob, Adam(0.01); maxiters = 2000)
     phi = discretization.phi
     u_exact = (t, x) -> exp(-t) * cos(x)
     ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-    u_predict = reshape([first(Array(phi([t, x], res.u))) for t in ts for x in xs],
-        (length(ts), length(xs)))
-    u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
+
+    u_predict = [first(Array(phi([t, x], res.u))) for t in ts for x in xs]
+    u_real = [u_exact(t, x) for t in ts for x in xs]
     diff_u = abs.(u_predict .- u_real)
+
     @test u_predict≈u_real atol=1.0
 end
 
-@testset "2D PDE" begin
+@testitem "2D PDE - CUDA" tags=[:cuda] setup=[CUDATestSetup] begin
+    using Lux, Optimization, OptimizationOptimisers, Random, ComponentArrays
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
     @parameters t x y
     @variables u(..)
     Dxx = Differential(x)^2
     Dyy = Differential(y)^2
     Dt = Differential(t)
-    t_min = 0.0
-    t_max = 2.0
-    x_min = 0.0
-    x_max = 2.0
-    y_min = 0.0
-    y_max = 2.0
+
+    t_min, t_max, x_min, x_max, y_min, y_max = 0.0, 2.0, 0.0, 2.0, 0.0, 2.0
 
     eq = Dt(u(t, x, y)) ~ Dxx(u(t, x, y)) + Dyy(u(t, x, y))
 
     analytic_sol_func(t, x, y) = exp(x + y) * cos(x + y + 4t)
+
     # Initial and boundary conditions
-    bcs = [u(t_min, x, y) ~ analytic_sol_func(t_min, x, y),
+    bcs = [
+        u(t_min, x, y) ~ analytic_sol_func(t_min, x, y),
         u(t, x_min, y) ~ analytic_sol_func(t, x_min, y),
         u(t, x_max, y) ~ analytic_sol_func(t, x_max, y),
         u(t, x, y_min) ~ analytic_sol_func(t, x, y_min),
-        u(t, x, y_max) ~ analytic_sol_func(t, x, y_max)]
+        u(t, x, y_max) ~ analytic_sol_func(t, x, y_max)
+    ]
 
     # Space and time domains
-    domains = [t ∈ Interval(t_min, t_max),
+    domains = [
+        t ∈ Interval(t_min, t_max),
         x ∈ Interval(x_min, x_max),
-        y ∈ Interval(y_min, y_max)]
+        y ∈ Interval(y_min, y_max)
+    ]
 
     # Neural network
     inner = 25
@@ -161,15 +196,17 @@ end
         Dense(inner, inner, σ), Dense(inner, inner, σ), Dense(inner, 1))
 
     strategy = GridTraining(0.05)
-    ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
+    ps = Lux.initialparameters(Random.default_rng(), chain) |> ComponentArray |> gpud |> f64
+
     discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
     @named pde_system = PDESystem(eq, bcs, domains, [t, x, y], [u(t, x, y)])
     prob = discretize(pde_system, discretization)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2500)
+    res = solve(prob, Adam(0.01); maxiters = 2500)
     prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 2500)
+    res = solve(prob, Adam(0.001); maxiters = 2500)
     phi = discretization.phi
     ts, xs, ys = [infimum(d.domain):0.1:supremum(d.domain) for d in domains]
+
     u_real = [analytic_sol_func(t, x, y) for t in ts for x in xs for y in ys]
     u_predict = [first(Array(phi([t, x, y], res.u))) for t in ts for x in xs
                  for y in ys]
diff --git a/test/NNPDE_tests.jl b/test/NNPDE_tests.jl
index 888179b561..bc79ce9b96 100644
--- a/test/NNPDE_tests.jl
+++ b/test/NNPDE_tests.jl
@@ -1,51 +1,14 @@
-using NeuralPDE, Test, Optimization, OptimizationOptimJL, OptimizationOptimisers, Integrals,
-      Cubature, QuasiMonteCarlo, DomainSets, Lux, LineSearches, Random
-import ModelingToolkit: Interval, infimum, supremum
-import Flux
+@testsetup module NNPDE1TestSetup
 
-Random.seed!(100)
+using NeuralPDE, Cubature, Integrals, QuasiMonteCarlo
 
-callback = function (p, l)
-    println("Current loss is: $l")
+function callback(p, l)
+    if p.iter == 1 || p.iter % 250 == 0
+        println("Current loss is: $l after $(p.iter) iterations")
+    end
     return false
 end
 
-function test_ode(strategy_)
-    println("Example 1, 1D ode: strategy: $(nameof(typeof(strategy_)))")
-    @parameters θ
-    @variables u(..)
-    Dθ = Differential(θ)
-
-    # 1D ODE
-    eq = Dθ(u(θ)) ~ θ^3 + 2 * θ + (θ^2) * ((1 + 3 * (θ^2)) / (1 + θ + (θ^3))) -
-                    u(θ) * (θ + ((1 + 3 * (θ^2)) / (1 + θ + θ^3)))
-
-    # Initial and boundary conditions
-    bcs = [u(0.0) ~ 1.0]
-
-    # Space and time domains
-    domains = [θ ∈ Interval(0.0, 1.0)]
-
-    # Neural network
-    chain = Chain(Dense(1, 12, σ), Dense(12, 1))
-
-    discretization = PhysicsInformedNN(chain, strategy_)
-    @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
-    prob = discretize(pde_system, discretization)
-
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 1000)
-    prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 500)
-    prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 500)
-    phi = discretization.phi
-    analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
-    ts = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
-    u_real = [analytic_sol_func(t) for t in ts]
-    u_predict = [first(phi(t, res.u)) for t in ts]
-    @test u_predict≈u_real atol=0.1
-end
-
 grid_strategy = GridTraining(0.1)
 quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
     reltol = 1e3, abstol = 1e-3, maxiters = 50, batch = 100)
@@ -63,13 +26,60 @@ strategies = [
     quadrature_strategy
 ]
 
-@testset "Test ODE/Heterogeneous" begin
+export callback, strategies
+
+end
+
+@testitem "Test Heterogeneous ODE" tags=[:nnpde1] setup=[NNPDE1TestSetup] begin
+    using Cubature, Integrals, QuasiMonteCarlo, DomainSets, Lux, Random, Optimisers
+
+    function simple_1d_ode(strategy)
+        @parameters θ
+        @variables u(..)
+        Dθ = Differential(θ)
+
+        # 1D ODE
+        eq = Dθ(u(θ)) ~ θ^3 + 2.0f0 * θ +
+                        (θ^2) * ((1.0f0 + 3 * (θ^2)) / (1.0f0 + θ + (θ^3))) -
+                        u(θ) * (θ + ((1.0f0 + 3.0f0 * (θ^2)) / (1.0f0 + θ + θ^3)))
+
+        # Initial and boundary conditions
+        bcs = [u(0.0) ~ 1.0f0]
+
+        # Space and time domains
+        domains = [θ ∈ Interval(0.0f0, 1.0f0)]
+
+        # Neural network
+        chain = Chain(Dense(1, 12, σ), Dense(12, 1))
+
+        discretization = PhysicsInformedNN(chain, strategy)
+        @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
+        prob = discretize(pde_system, discretization)
+
+        res = solve(prob, Adam(0.1); maxiters = 1000)
+        prob = remake(prob, u0 = res.u)
+        res = solve(prob, Adam(0.01); maxiters = 500)
+        prob = remake(prob, u0 = res.u)
+        res = solve(prob, Adam(0.001); maxiters = 500)
+        phi = discretization.phi
+
+        analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
+        ts = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
+        u_real = [analytic_sol_func(t) for t in ts]
+        u_predict = [first(phi([t], res.u)) for t in ts]
+        @test u_predict≈u_real atol=0.8
+    end
+
     @testset "$(nameof(typeof(strategy)))" for strategy in strategies
-        test_ode(strategy)
+        simple_1d_ode(strategy)
     end
 end
 
-@testset "Example 1: Heterogeneous system" begin
+@testitem "PDE I: Heterogeneous system" tags=[:nnpde1] setup=[NNPDE1TestSetup] begin
+    using DomainSets, Lux, Random, Optimisers, Integrals
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
     @parameters x, y, z
     @variables u(..), v(..), h(..), p(..)
     Dz = Differential(z)
@@ -82,7 +92,7 @@ end
                                                     exp(x) * exp(z)
     ]
 
-    bcs = [u(0, 0, 0) ~ 0.0]
+    bcs = [u(0.0, 0.0, 0.0) ~ 0.0]
 
     domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0), z ∈ Interval(0.0, 1.0)]
 
@@ -104,12 +114,7 @@ end
 
     prob = discretize(pde_system, discretization)
 
-    callback = function (p, l)
-        println("Current loss is: $l")
-        return false
-    end
-
-    res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); maxiters = 2000)
+    res = solve(prob, BFGS(); maxiters = 2000, callback)
 
     phi = discretization.phi
 
@@ -134,82 +139,97 @@ end
     v_predict = [phi[2]([y, x], res.u.depvar.v)[1] for y in ys for x in xs]
     h_predict = [phi[3]([z], res.u.depvar.h)[1] for z in zs]
     p_predict = [phi[4]([x, z], res.u.depvar.p)[1] for x in xs for z in zs]
+
     predict = [u_predict, v_predict, h_predict, p_predict]
+
     for i in 1:4
         @test predict[i]≈real_[i] rtol=10^-2
     end
 end
 
-function test_2d_poisson_equation(chain_, strategy_)
-    println("Example 2, 2D Poisson equation, chain: $(nameof(typeof(chain_))), strategy: $(nameof(typeof(strategy_)))")
-    @parameters x y
-    @variables u(..)
-    Dxx = Differential(x)^2
-    Dyy = Differential(y)^2
+@testitem "PDE II: 2D Poisson" tags=[:nnpde1] setup=[NNPDE1TestSetup] begin
+    using Lux, Random, Optimisers, DomainSets, Cubature, QuasiMonteCarlo, Integrals
+    import ModelingToolkit: Interval, infimum, supremum
 
-    # 2D PDE
-    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+    function test_2d_poisson_equation(chain, strategy)
+        @parameters x y
+        @variables u(..)
+        Dxx = Differential(x)^2
+        Dyy = Differential(y)^2
 
-    # Initial and boundary conditions
-    bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sin(pi * 1) * sin(pi * y),
-        u(x, 0) ~ 0.0, u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
-    # Space and time domains
-    domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
-    ps = Lux.setup(Random.default_rng(), chain_)[1]
-    discretization = PhysicsInformedNN(chain_, strategy_; init_params = ps)
-    @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-    prob = discretize(pde_system, discretization)
-    res = solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 500, cb = callback)
-    phi = discretization.phi
+        # 2D PDE
+        eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
 
-    xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-    analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+        # Boundary conditions
+        bcs = [
+            u(0, y) ~ 0.0,
+            u(1, y) ~ 0.0,
+            u(x, 0) ~ 0.0,
+            u(x, 1) ~ 0.0
+        ]
 
-    u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-        (length(xs), length(ys)))
-    u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-        (length(xs), length(ys)))
-    @test u_predict≈u_real atol=2.0
-end
+        # Space and time domains
+        domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
+
+        ps = Lux.initialparameters(Random.default_rng(), chain)
+
+        discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
+        @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+        prob = discretize(pde_system, discretization)
+        res = solve(prob, Adam(0.1); maxiters = 500, callback)
+        phi = discretization.phi
+
+        xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
+        analytic_sol_func(x, y) = (sinpi(x) * sinpi(y)) / (2pi^2)
+
+        u_predict = [first(phi([x, y], res.u)) for x in xs for y in ys]
+        u_real = [analytic_sol_func(x, y) for x in xs for y in ys]
+
+        @test u_predict≈u_real atol=2.0
+    end
 
-@testset "Example 2, 2D Poisson equation" begin
-    grid_strategy = GridTraining(0.1)
     chain = Chain(Dense(2, 12, σ), Dense(12, 12, σ), Dense(12, 1))
-    test_2d_poisson_equation(chain, grid_strategy)
 
     @testset "$(nameof(typeof(strategy)))" for strategy in strategies
-        chain_ = Chain(Dense(2, 12, σ), Dense(12, 12, σ), Dense(12, 1))
-        test_2d_poisson_equation(chain_, strategy)
+        test_2d_poisson_equation(chain, strategy)
     end
 
     algs = [CubatureJLp()]
     @testset "$(nameof(typeof(alg)))" for alg in algs
-        chain_ = Chain(Dense(2, 12, σ), Dense(12, 12, σ), Dense(12, 1))
-        strategy_ = NeuralPDE.QuadratureTraining(quadrature_alg = alg, reltol = 1e-4,
+        strategy = QuadratureTraining(quadrature_alg = alg, reltol = 1e-4,
             abstol = 1e-3, maxiters = 30, batch = 10)
-        test_2d_poisson_equation(chain_, strategy_)
+        test_2d_poisson_equation(chain, strategy)
     end
 end
 
-@testset "Example 3, 3rd-order ode" begin
+@testitem "PDE III: 3rd-order ODE" tags=[:nnpde1] setup=[NNPDE1TestSetup] begin
+    using Lux, Random, Optimisers, DomainSets, Cubature, QuasiMonteCarlo, Integrals
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
     @parameters x
     @variables u(..), Dxu(..), Dxxu(..), O1(..), O2(..)
     Dxxx = Differential(x)^3
     Dx = Differential(x)
 
     # ODE
-    eq = Dx(Dxxu(x)) ~ cos(pi * x)
+    eq = Dx(Dxxu(x)) ~ cospi(x)
 
     # Initial and boundary conditions
-    bcs_ = [u(0.0) ~ 0.0,
-        u(1.0) ~ cos(pi),
-        Dxu(1.0) ~ 1.0]
+    bcs_ = [
+        u(0.0) ~ 0.0,
+        u(1.0) ~ cospi(1.0),
+        Dxu(1.0) ~ 1.0
+    ]
     ep = (cbrt(eps(eltype(Float64))))^2 / 6
 
-    der = [Dxu(x) ~ Dx(u(x)) + ep * O1(x),
-        Dxxu(x) ~ Dx(Dxu(x)) + ep * O2(x)]
+    der = [
+        Dxu(x) ~ Dx(u(x)) + ep * O1(x),
+        Dxxu(x) ~ Dx(Dxu(x)) + ep * O2(x)
+    ]
 
     bcs = [bcs_; der]
+
     # Space and time domains
     domains = [x ∈ Interval(0.0, 1.0)]
 
@@ -229,14 +249,16 @@ end
     pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
     bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
 
-    cb_ = function (p, l)
-        println("loss: ", l)
-        println("pde_losses: ", map(l_ -> l_(p.u), pde_inner_loss_functions))
-        println("bcs_losses: ", map(l_ -> l_(p.u), bcs_inner_loss_functions))
+    callback = function (p, l)
+        if p.iter % 100 == 0 || p.iter == 1
+            println("loss: ", l)
+            println("pde_losses: ", map(l_ -> l_(p.u), pde_inner_loss_functions))
+            println("bcs_losses: ", map(l_ -> l_(p.u), bcs_inner_loss_functions))
+        end
         return false
     end
 
-    res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 1000)
+    res = solve(prob, BFGS(); maxiters = 1000, callback)
     phi = discretization.phi[1]
 
     analytic_sol_func(x) = (π * x * (-x + (π^2) * (2 * x - 3) + 1) - sin(π * x)) / (π^3)
@@ -248,19 +270,26 @@ end
     @test u_predict≈u_real atol=10^-4
 end
 
-@testset "Example 4, system of pde" begin
+@testitem "PDE IV: System of PDEs" tags=[:nnpde1] setup=[NNPDE1TestSetup] begin
+    using Lux, Random, Optimisers, DomainSets, Cubature, QuasiMonteCarlo, Integrals
+    import ModelingToolkit: Interval, infimum, supremum
+
     @parameters x, y
     @variables u1(..), u2(..)
     Dx = Differential(x)
     Dy = Differential(y)
 
     # System of pde
-    eqs = [Dx(u1(x, y)) + 4 * Dy(u2(x, y)) ~ 0,
-        Dx(u2(x, y)) + 9 * Dy(u1(x, y)) ~ 0]
-    # 3*u1(x,0) ~ 2*u2(x,0)]
+    eqs = [
+        Dx(u1(x, y)) + 4 * Dy(u2(x, y)) ~ 0,
+        Dx(u2(x, y)) + 9 * Dy(u1(x, y)) ~ 0
+    ]
 
     # Initial and boundary conditions
-    bcs = [u1(x, 0) ~ 2 * x, u2(x, 0) ~ 3 * x]
+    bcs = [
+        u1(x, 0) ~ 2 * x,
+        u2(x, 0) ~ 3 * x
+    ]
 
     # Space and time domains
     domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
@@ -270,8 +299,7 @@ end
     chain2 = Chain(Dense(2, 15, tanh), Dense(15, 1))
 
     quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
-        reltol = 1e-3, abstol = 1e-3,
-        maxiters = 50, batch = 100)
+        reltol = 1e-3, abstol = 1e-3, maxiters = 50, batch = 100)
     chain = [chain1, chain2]
 
     discretization = PhysicsInformedNN(chain, quadrature_strategy)
@@ -280,7 +308,7 @@ end
 
     prob = discretize(pde_system, discretization)
 
-    res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 1000)
+    res = solve(prob, Adam(0.01); maxiters = 2000, callback)
     phi = discretization.phi
 
     analytic_sol_func(x, y) = [1 / 3 * (6x - y), 1 / 2 * (6x - y)]
@@ -291,12 +319,16 @@ end
     u_predict = [[phi[i]([x, y], res.u.depvar[depvars[i]])[1] for x in xs for y in ys]
                  for i in 1:2]
 
-    @test u_predict[1]≈u_real[1] atol=0.1
-    @test u_predict[2]≈u_real[2] atol=0.1
+    @test u_predict[1]≈u_real[1] atol=0.3 norm=Base.Fix1(maximum, abs)
+    @test u_predict[2]≈u_real[2] atol=0.3 norm=Base.Fix1(maximum, abs)
 end
 
-@testset "Example 5, 2d wave equation, neumann boundary condition" begin
-    # here we use low level api for build solution
+@testitem "PDE V: 2D Wave Equation" tags=[:nnpde1] setup=[NNPDE1TestSetup] begin
+    using Lux, Random, Optimisers, DomainSets, Cubature, QuasiMonteCarlo, Integrals,
+          LineSearches, Integrals
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
     @parameters x, t
     @variables u(..)
     Dxx = Differential(x)^2
@@ -337,7 +369,7 @@ end
         return false
     end
 
-    res = solve(prob, OptimizationOptimJL.BFGS(linesearch = BackTracking()); maxiters = 500)
+    res = solve(prob, BFGS(linesearch = BackTracking()); maxiters = 500, callback)
 
     dx = 0.1
     xs, ts = [infimum(d.domain):dx:supremum(d.domain) for d in domains]
@@ -352,7 +384,10 @@ end
     @test u_predict≈u_real atol=0.1
 end
 
-@testset "Example 6, pde with mixed derivative" begin
+@testitem "PDE VI: PDE with mixed derivative" tags=[:nnpde1] setup=[NNPDE1TestSetup] begin
+    using Lux, Random, Optimisers, DomainSets, Cubature, QuasiMonteCarlo, Integrals
+    import ModelingToolkit: Interval, infimum, supremum
+
     @parameters x y
     @variables u(..)
     Dxx = Differential(x)^2
@@ -363,38 +398,40 @@ end
     eq = Dxx(u(x, y)) + Dx(Dy(u(x, y))) - 2 * Dyy(u(x, y)) ~ -1.0
 
     # Initial and boundary conditions
-    bcs = [u(x, 0) ~ x,
+    bcs = [
+        u(x, 0) ~ x,
         Dy(u(x, 0)) ~ x,
-        u(x, 0) ~ Dy(u(x, 0))]
+        u(x, 0) ~ Dy(u(x, 0))
+    ]
 
     # Space and time domains
     domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
 
-    quadrature_strategy = QuadratureTraining()
+    strategy = StochasticTraining(1024)
     inner = 20
     chain = Chain(Dense(2, inner, tanh), Dense(inner, inner, tanh), Dense(inner, 1))
 
-    discretization = PhysicsInformedNN(chain, quadrature_strategy)
+    discretization = PhysicsInformedNN(chain, strategy)
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 
     prob = discretize(pde_system, discretization)
-
-    res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 1500)
-    @show res.original
-
+    res = solve(prob, Adam(0.01); maxiters = 5000, callback)
     phi = discretization.phi
 
     analytic_sol_func(x, y) = x + x * y + y^2 / 2
     xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
 
-    u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-        (length(xs), length(ys)))
-    u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-        (length(xs), length(ys)))
+    u_predict = [first(phi([x, y], res.u)) for x in xs for y in ys]
+    u_real = [analytic_sol_func(x, y) for x in xs for y in ys]
     @test u_predict≈u_real rtol=0.1
 end
 
-@testset "Translating from Flux" begin
+@testitem "NNPDE: Translating from Flux" tags=[:nnpde1] setup=[NNPDE1TestSetup] begin
+    using Lux, Random, Optimisers, DomainSets, Cubature, QuasiMonteCarlo, Integrals
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+    import Flux
+
     @parameters θ
     @variables u(..)
     Dθ = Differential(θ)
@@ -409,11 +446,11 @@ end
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
     prob = discretize(pde_system, discretization)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 1000)
+    res = solve(prob, Adam(0.1); maxiters = 1000)
     prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 500)
+    res = solve(prob, Adam(0.01); maxiters = 500)
     prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 500)
+    res = solve(prob, Adam(0.001); maxiters = 500)
     phi = discretization.phi
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index 6e9a6c059a..28f1d8d94a 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -1,21 +1,17 @@
-using Optimization, OptimizationOptimisers, Test, NeuralPDE, Random, DomainSets, Lux
+@testsetup module AdaptiveLossTestSetup
+using Optimization, OptimizationOptimisers, Random, DomainSets, Lux, NeuralPDE, Test,
+      TensorBoardLogger
 import ModelingToolkit: Interval, infimum, supremum
 
-nonadaptive_loss = NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
-gradnormadaptive_loss = GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3,
-    bc_loss_weights = 1)
-adaptive_loss = MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1)
-adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, adaptive_loss]
-maxiters = 4000
-seed = 60
+function solve_with_adaptive_loss(
+        adaptive_loss; haslogger = false, outdir = mktempdir(), run = 1)
+    logdir = joinpath(outdir, string(run))
+    logger = haslogger ? TBLogger(logdir) : nothing
 
-## 2D Poisson equation
-function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxiters = 4000)
-    Random.seed!(seed)
-    hid = 32
-    chain_ = Chain(Dense(2, hid, tanh), Dense(hid, hid, tanh), Dense(hid, 1))
-
-    strategy_ = StochasticTraining(256)
+    Random.seed!(60)
+    hid = 40
+    chain = Chain(Dense(2, hid, tanh), Dense(hid, hid, tanh), Dense(hid, 1))
+    strategy = StochasticTraining(256)
 
     @parameters x y
     @variables u(..)
@@ -23,46 +19,106 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxite
     Dyy = Differential(y)^2
 
     # 2D PDE
-    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sinpi(x) * sinpi(y)
 
     # Initial and boundary conditions
-    bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sin(pi * 1) * sin(pi * y),
-        u(x, 0) ~ 0.0, u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
+    bcs = [
+        u(0, y) ~ 0.0,
+        u(1, y) ~ -sinpi(1) * sinpi(y),
+        u(x, 0) ~ 0.0,
+        u(x, 1) ~ -sinpi(x) * sinpi(1)
+    ]
+
     # Space and time domains
-    domains = [x ∈ Interval(0.0, 1.0),
-        y ∈ Interval(0.0, 1.0)]
+    domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
 
-    iteration = [0]
-    discretization = PhysicsInformedNN(chain_, strategy_; adaptive_loss, logger = nothing,
-        iteration)
+    discretization = PhysicsInformedNN(chain, strategy; adaptive_loss, logger)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
     prob = discretize(pde_system, discretization)
     phi = discretization.phi
+
     xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-    analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-    u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-        (length(xs), length(ys)))
+    analytic_sol_func(x, y) = (sinpi(x) * sinpi(y)) / (2pi^2)
+    u_real = [analytic_sol_func(x, y) for x in xs for y in ys]
 
     callback = function (p, l)
-        iteration[] += 1
-        if iteration[] % 100 == 0
-            @info "Current loss is: $l, iteration is $(iteration[])"
+        if p.iter % 250 == 0
+            @info "[$(nameof(typeof(adaptive_loss)))] Current loss is: $l, iteration is $(p.iter)"
+        end
+        if haslogger
+            log_value(logger, "outer_error/loss", l, step = p.iter)
+            if p.iter % 30 == 0
+                u_predict = [first(phi([x, y], p.u)) for x in xs for y in ys]
+                total_diff = sum(abs, u_predict .- u_real)
+                log_value(logger, "outer_error/total_diff", total_diff, step = p.iter)
+                log_value(logger, "outer_error/total_diff_rel",
+                    total_diff / sum(abs2, u_real), step = p.iter)
+                log_value(logger, "outer_error/total_diff_sq",
+                    sum(abs2, u_predict .- u_real), step = p.iter)
+            end
         end
         return false
     end
-    res = solve(prob, OptimizationOptimisers.Adam(0.03); maxiters, callback)
-    u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-        (length(xs), length(ys)))
+
+    res = solve(prob, Adam(0.03); maxiters = 2000, callback)
+    u_predict = [first(phi([x, y], res.u)) for x in xs for y in ys]
+
     total_diff = sum(abs, u_predict .- u_real)
     total_u = sum(abs, u_real)
     total_diff_rel = total_diff / total_u
-    return (; error = total_diff, total_diff_rel)
+
+    return total_diff_rel
+end
+
+export solve_with_adaptive_loss
+
+end
+
+@testitem "2D Poisson: NonAdaptiveLoss" tags=[:adaptiveloss] setup=[AdaptiveLossTestSetup] begin
+    loss = NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
+
+    tmpdir = mktempdir()
+
+    total_diff_rel = solve_with_adaptive_loss(
+        loss; haslogger = false, outdir = tmpdir, run = 1)
+    @test total_diff_rel < 0.4
+    @test length(readdir(tmpdir)) == 0
+
+    total_diff_rel = solve_with_adaptive_loss(
+        loss; haslogger = true, outdir = tmpdir, run = 2)
+    @test total_diff_rel < 0.4
+    @test length(readdir(tmpdir)) == 1
 end
 
-@testset "$(nameof(typeof(adaptive_loss)))" for adaptive_loss in adaptive_losses
-    error_results_no_logs = test_2d_poisson_equation_adaptive_loss(
-        adaptive_loss; seed, maxiters)
+@testitem "2D Poisson: GradientScaleAdaptiveLoss" tags=[:adaptiveloss] setup=[AdaptiveLossTestSetup] begin
+    loss = GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3, bc_loss_weights = 1)
+
+    tmpdir = mktempdir()
+
+    total_diff_rel = solve_with_adaptive_loss(
+        loss; haslogger = false, outdir = tmpdir, run = 1)
+    @test total_diff_rel < 0.4
+    @test length(readdir(tmpdir)) == 0
+
+    total_diff_rel = solve_with_adaptive_loss(
+        loss; haslogger = true, outdir = tmpdir, run = 2)
+    @test total_diff_rel < 0.4
+    @test length(readdir(tmpdir)) == 1
+end
+
+@testitem "2D Poisson: MiniMaxAdaptiveLoss" tags=[:adaptiveloss] setup=[AdaptiveLossTestSetup] begin
+    loss = MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1)
+
+    tmpdir = mktempdir()
+
+    total_diff_rel = solve_with_adaptive_loss(
+        loss; haslogger = false, outdir = tmpdir, run = 1)
+    @test total_diff_rel < 0.4
+    @test length(readdir(tmpdir)) == 0
 
-    @test error_results_no_logs[:total_diff_rel] < 0.4
+    total_diff_rel = solve_with_adaptive_loss(
+        loss; haslogger = true, outdir = tmpdir, run = 2)
+    @test total_diff_rel < 0.4
+    @test length(readdir(tmpdir)) == 1
 end
diff --git a/test/additional_loss_tests.jl b/test/additional_loss_tests.jl
index 25e67466af..7102f0ba8a 100644
--- a/test/additional_loss_tests.jl
+++ b/test/additional_loss_tests.jl
@@ -1,24 +1,23 @@
-using NeuralPDE, Test, Optimization, OptimizationOptimJL, OptimizationOptimisers,
-      QuasiMonteCarlo, Random, DomainSets, Integrals, Cubature, OrdinaryDiffEq,
-      ComponentArrays, Lux
-import ModelingToolkit: Interval, infimum, supremum
+@testitem "Fokker-Planck" tags=[:nnpde2] begin
+    using Optimization, OptimizationOptimisers, Random, DomainSets, Lux, ComponentArrays,
+          Integrals, Cubature
+    import ModelingToolkit: Interval, infimum, supremum
+    using OptimizationOptimJL: BFGS, LBFGS
 
-@testset "Fokker-Planck" begin
     # the example took from this article https://arxiv.org/abs/1910.10503
     @parameters x
     @variables p(..)
     Dx = Differential(x)
     Dxx = Differential(x)^2
-    α = 0.3
-    β = 0.5
-    _σ = 0.5
-    # Discretization
+
+    α, β, _σ = 0.3, 0.5, 0.5
     dx = 0.01
+
     # here we use normalization condition: dx*p(x) ~ 1, in order to get non-zero solution.
     # (α - 3*β*x^2)*p(x) + (α*x - β*x^3)*Dx(p(x)) ~ (_σ^2/2)*Dxx(p(x))
     eq = [Dx((α * x - β * x^3) * p(x)) ~ (_σ^2 / 2) * Dxx(p(x))]
-    x_0 = -2.2
-    x_end = 2.2
+    x_0, x_end = -2.2, 2.2
+
     # Initial and boundary conditions
     bcs = [p(x_0) ~ 0.0, p(x_end) ~ 0.0]
 
@@ -28,77 +27,93 @@ import ModelingToolkit: Interval, infimum, supremum
     # Neural network
     inn = 18
     chain = Chain(Dense(1, inn, σ), Dense(inn, inn, σ), Dense(inn, inn, σ), Dense(inn, 1))
+
     init_params = ComponentArray{Float64}(Lux.initialparameters(
         Random.default_rng(), chain))
-    lb = [x_0]
-    ub = [x_end]
+
+    lb, ub = [x_0], [x_end]
+
     function norm_loss_function(phi, θ, p)
-        function inner_f(x, θ)
-            dx * phi(x, θ) .- 1
-        end
+        inner_f(x, θ) = dx * phi(x, θ) .- 1
         prob1 = IntegralProblem(inner_f, (lb, ub), θ)
         norm2 = solve(prob1, HCubatureJL(), reltol = 1e-8, abstol = 1e-8, maxiters = 10)
         return abs(norm2[1])
     end
-    discretization = PhysicsInformedNN(chain, GridTraining(dx); init_params = init_params,
+
+    discretization = PhysicsInformedNN(chain, GridTraining(dx); init_params,
         additional_loss = norm_loss_function)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [p(x)])
     prob = discretize(pde_system, discretization)
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+
+    sym_prob = symbolic_discretize(pde_system, discretization)
     pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
     bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
     phi = discretization.phi
-    cb_ = function (p, l)
-        println("loss: ", l)
-        println("pde_losses: ", map(l_ -> l_(p.u), pde_inner_loss_functions))
-        println("bcs_losses: ", map(l_ -> l_(p.u), bcs_inner_loss_functions))
-        println("additional_loss: ", norm_loss_function(phi, p.u, nothing))
+
+    callback = function (p, l)
+        if p.iter % 100 == 0
+            println("loss: ", l)
+            println("pde_losses: ", map(l_ -> l_(p.u), pde_inner_loss_functions))
+            println("bcs_losses: ", map(l_ -> l_(p.u), bcs_inner_loss_functions))
+            println("additional_loss: ", norm_loss_function(phi, p.u, nothing))
+        end
         return false
     end
-    res = solve(prob, OptimizationOptimJL.LBFGS(), maxiters = 400, callback = cb_)
-    prob = remake(prob, u0 = res.u)
-    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 2000, callback = cb_)
+
+    res = solve(prob, LBFGS(); maxiters = 400, callback)
+    prob = remake(prob; u0 = res.u)
+    res = solve(prob, BFGS(); maxiters = 2000, callback)
+
     C = 142.88418699042
     analytic_sol_func(x) = C * exp((1 / (2 * _σ^2)) * (2 * α * x^2 - β * x^4))
     xs = [infimum(d.domain):dx:supremum(d.domain) for d in domains][1]
     u_real = [analytic_sol_func(x) for x in xs]
+
     u_predict = [first(phi(x, res.u)) for x in xs]
     @test u_predict≈u_real rtol=1e-3
 
-    ### No init_params
     discretization = PhysicsInformedNN(
         chain, GridTraining(dx); additional_loss = norm_loss_function)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [p(x)])
     prob = discretize(pde_system, discretization)
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+
+    sym_prob = symbolic_discretize(pde_system, discretization)
     pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
     bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
     phi = discretization.phi
-    cb_ = function (p, l)
-        println("loss: ", l)
-        println("pde_losses: ", map(l_ -> l_(p.u), pde_inner_loss_functions))
-        println("bcs_losses: ", map(l_ -> l_(p.u), bcs_inner_loss_functions))
-        println("additional_loss: ", norm_loss_function(phi, p.u, nothing))
+
+    callback = function (p, l)
+        if p.iter % 100 == 0
+            println("loss: ", l)
+            println("pde_losses: ", map(l_ -> l_(p.u), pde_inner_loss_functions))
+            println("bcs_losses: ", map(l_ -> l_(p.u), bcs_inner_loss_functions))
+            println("additional_loss: ", norm_loss_function(phi, p.u, nothing))
+        end
         return false
     end
-    res = solve(prob, OptimizationOptimJL.LBFGS(), maxiters = 400, callback = cb_)
-    prob = remake(prob, u0 = res.u)
-    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 2000, callback = cb_)
-    C = 142.88418699042
-    analytic_sol_func(x) = C * exp((1 / (2 * _σ^2)) * (2 * α * x^2 - β * x^4))
-    xs = [infimum(d.domain):dx:supremum(d.domain) for d in domains][1]
-    u_real = [analytic_sol_func(x) for x in xs]
+
+    res = solve(prob, LBFGS(); maxiters = 400, callback)
+    prob = remake(prob; u0 = res.u)
+    res = solve(prob, BFGS(); maxiters = 2000, callback)
+
     u_predict = [first(phi(x, res.u)) for x in xs]
     @test u_predict≈u_real rtol=1e-3
 end
 
-@testset "Lorenz System" begin
+@testitem "Lorenz System" tags=[:nnpde2] begin
+    using Optimization, OptimizationOptimisers, Random, DomainSets, Lux, ComponentArrays,
+          OrdinaryDiffEq
+    import ModelingToolkit: Interval, infimum, supremum
+    using OptimizationOptimJL: BFGS
+
     @parameters t, σ_, β, ρ
     @variables x(..), y(..), z(..)
     Dt = Differential(t)
-    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+    eqs = [
+        Dt(x(t)) ~ σ_ * (y(t) - x(t)),
         Dt(y(t)) ~ x(t) * (ρ - z(t)) - y(t),
-        Dt(z(t)) ~ x(t) * y(t) - β * z(t)]
+        Dt(z(t)) ~ x(t) * y(t) - β * z(t)
+    ]
 
     bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
     domains = [t ∈ Interval(0.0, 1.0)]
@@ -107,7 +122,7 @@ end
     input_ = length(domains)
     n = 12
     chain = [Chain(Dense(input_, n, tanh), Dense(n, n, σ), Dense(n, 1)) for _ in 1:3]
-    #Generate Data
+
     function lorenz!(du, u, p, t)
         du[1] = 10.0 * (u[2] - u[1])
         du[2] = u[1] * (28.0 - u[3]) - u[2]
@@ -120,97 +135,99 @@ end
     sol = solve(prob, Tsit5(), dt = 0.1)
     ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
 
-    function getData(sol)
-        data = []
-        us = hcat(sol(ts).u...)
-        ts_ = hcat(sol(ts).t...)
-        return [us, ts_]
-    end
-
-    data = getData(sol)
+    data = [reduce(hcat, sol.u), reduce(hcat, sol.t)]
 
-    #Additional Loss Function
-    init_params = [Float64.(ComponentArray(Lux.setup(Random.default_rng(), chain[i])[1]))
+    init_params = [ComponentArray{Float64}(Lux.initialparameters(
+                       Random.default_rng(), chain[i]))
                    for i in 1:3]
+
     names = (:x, :y, :z)
     flat_init_params = ComponentArray(NamedTuple{names}(i for i in init_params))
 
     acum = [0; accumulate(+, length.(init_params))]
     sep = [(acum[i] + 1):acum[i + 1] for i in 1:(length(acum) - 1)]
-    (u_, t_) = data
+    u_, t_ = data
     len = length(data[2])
 
     function additional_loss(phi, θ, p)
-        return sum(sum(abs2, phi[i](t_, getproperty(θ, names[i])) .- u_[[i], :]) /
-                   len
-        for i in 1:1:3)
+        return sum(1:3) do i
+            sum(abs2, phi[i](t_, getproperty(θ, names[i])) .- u_[[i], :]) / len
+        end
     end
 
     discretization = PhysicsInformedNN(chain, GridTraining(dt);
         init_params = flat_init_params, param_estim = true, additional_loss)
 
-    additional_loss(discretization.phi, flat_init_params, nothing)
     @named pde_system = PDESystem(eqs, bcs, domains,
         [t], [x(t), y(t), z(t)], [σ_, ρ, β],
         defaults = Dict([p => 1.0 for p in [σ_, ρ, β]]))
+
     prob = discretize(pde_system, discretization)
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-    sym_prob.loss_functions.full_loss_function(
-        ComponentArray(depvar = flat_init_params, p = ones(3)), Float64[])
+    sym_prob = symbolic_discretize(pde_system, discretization)
 
-    res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 6000)
+    res = solve(prob, BFGS(); maxiters = 6000)
     p_ = res.u[(end - 2):end]
-    @test sum(abs2, p_[1] - 10.00) < 0.1
-    @test sum(abs2, p_[2] - 28.00) < 0.1
+    @test sum(abs2, p_[1] - 10.0) < 0.1
+    @test sum(abs2, p_[2] - 28.0) < 0.1
     @test sum(abs2, p_[3] - (8 / 3)) < 0.1
 
-    ### No init_params
     discretization = PhysicsInformedNN(
         chain, GridTraining(dt); param_estim = true, additional_loss)
 
-    additional_loss(discretization.phi, flat_init_params, nothing)
     @named pde_system = PDESystem(eqs, bcs, domains,
         [t], [x(t), y(t), z(t)], [σ_, ρ, β],
         defaults = Dict([p => 1.0 for p in [σ_, ρ, β]]))
+
     prob = discretize(pde_system, discretization)
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-    sym_prob.loss_functions.full_loss_function(sym_prob.flat_init_params, nothing)
-    res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 6000)
+    sym_prob = symbolic_discretize(pde_system, discretization)
+
+    res = solve(prob, BFGS(); maxiters = 6000)
     p_ = res.u[(end - 2):end]
-    @test sum(abs2, p_[1] - 10.00) < 0.1
-    @test sum(abs2, p_[2] - 28.00) < 0.1
+    @test sum(abs2, p_[1] - 10.0) < 0.1
+    @test sum(abs2, p_[2] - 28.0) < 0.1
     @test sum(abs2, p_[3] - (8 / 3)) < 0.1
 end
 
-@testset "Approximation from data and additional_loss" begin
+@testitem "Approximation from data and additional_loss" tags=[:nnpde2] begin
+    using Optimization, OptimizationOptimisers, Random, DomainSets, Optimisers,
+          ModelingToolkit, OrdinaryDiffEq, LinearAlgebra, Lux
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
     @parameters x
     @variables u(..)
+
     eq = [u(0) ~ u(0)]
     bc = [u(0) ~ u(0)]
     x0 = 0
     x_end = pi
     dx = pi / 10
+
     domain = [x ∈ Interval(x0, x_end)]
     hidden = 10
+
     chain = Chain(Dense(1, hidden, tanh), Dense(hidden, hidden, sin),
         Dense(hidden, hidden, tanh), Dense(hidden, 1))
+
     strategy = GridTraining(dx)
     xs = collect(x0:dx:x_end)'
-    aproxf_(x) = @. cos(pi * x)
-    data = aproxf_(xs)
-    function additional_loss_(phi, θ, p)
-        sum(abs2, phi(xs, θ) .- data)
-    end
-    discretization = PhysicsInformedNN(chain, strategy; additional_loss = additional_loss_)
+
+    aproxf(x) = @. cospi(x)
+    data = aproxf(xs)
+
+    u_ = (cord, θ, phi) -> sum(phi(cord, θ))
+
+    additional_loss(phi, θ, p) = sum(abs2, phi(xs, θ) .- data)
+
+    discretization = PhysicsInformedNN(chain, strategy; additional_loss)
     @named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
     prob = discretize(pde_system, discretization)
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-    flat_init_params = sym_prob.flat_init_params
-    phi = discretization.phi
-    phi(xs, flat_init_params)
-    additional_loss_(phi, flat_init_params, nothing)
-    res = solve(prob, OptimizationOptimisers.Adam(0.01), maxiters = 500)
+    sym_prob = symbolic_discretize(pde_system, discretization)
+
+    res = solve(prob, Adam(0.01); maxiters = 500)
     prob = remake(prob, u0 = res.u)
-    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 500)
-    @test phi(xs, res.u)≈aproxf_(xs) rtol=0.01
+    res = solve(prob, BFGS(); maxiters = 500)
+    phi = discretization.phi
+
+    @test phi(xs, res.u)≈aproxf(xs) rtol=0.01
 end
diff --git a/test/dgm_test.jl b/test/dgm_tests.jl
similarity index 74%
rename from test/dgm_test.jl
rename to test/dgm_tests.jl
index 2d458ec39c..45a886d3dd 100644
--- a/test/dgm_test.jl
+++ b/test/dgm_tests.jl
@@ -1,10 +1,8 @@
-using NeuralPDE, Test
+@testitem "Poisson's equation" tags=[:dgm] begin
+    using ModelingToolkit, Optimization, OptimizationOptimisers, Distributions,
+          MethodOfLines, OrdinaryDiffEq, LinearAlgebra
+    import ModelingToolkit: Interval, infimum, supremum
 
-using ModelingToolkit, Optimization, OptimizationOptimisers, Distributions, MethodOfLines,
-      OrdinaryDiffEq, LinearAlgebra
-import ModelingToolkit: Interval, infimum, supremum
-
-@testset "Poisson's equation" begin
     @parameters x y
     @variables u(..)
     Dxx = Differential(x)^2
@@ -30,31 +28,26 @@ import ModelingToolkit: Interval, infimum, supremum
         return false
     end
 
-    res = Optimization.solve(
-        prob, OptimizationOptimisers.Adam(0.01); callback, maxiters = 500)
+    res = solve(prob, Adam(0.01); callback, maxiters = 500)
     prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(
-        prob, OptimizationOptimisers.Adam(0.001); callback, maxiters = 200)
+    res = solve(prob, Adam(0.001); callback, maxiters = 200)
     phi = discretization.phi
 
     xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
     analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
 
-    u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-        (length(xs), length(ys)))
-    u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-        (length(xs), length(ys)))
+    u_predict = [first(phi([x, y], res.u)) for x in xs for y in ys]
+    u_real = [analytic_sol_func(x, y) for x in xs for y in ys]
 
     @test u_real≈u_predict atol=0.4
 end
 
-@testset "Black-Scholes PDE: European Call Option" begin
-    K = 50.0
-    T = 1.0
-    r = 0.05
-    σ = 0.25
-    S = 130.0
-    S_multiplier = 1.3
+@testitem "Black-Scholes PDE: European Call Option" tags=[:dgm] begin
+    using ModelingToolkit, Optimization, OptimizationOptimisers, Distributions,
+          MethodOfLines, OrdinaryDiffEq, LinearAlgebra
+    import ModelingToolkit: Interval, infimum, supremum
+
+    K, T, r, σ, S, S_multiplier = 50.0, 1.0, 0.05, 0.25, 130.0, 1.3
 
     @parameters x t
     @variables g(..)
@@ -81,9 +74,9 @@ end
         return false
     end
 
-    res = Optimization.solve(prob, Adam(0.1); callback, maxiters = 100)
+    res = solve(prob, Adam(0.1); callback, maxiters = 100)
     prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, Adam(0.01); callback, maxiters = 500)
+    res = solve(prob, Adam(0.01); callback, maxiters = 500)
     phi = discretization.phi
 
     function analytical_soln(t, x, K, σ, T)
@@ -102,7 +95,11 @@ end
     @test u_predict≈u_real rtol=0.05
 end
 
-@testset "Burger's equation" begin
+@testitem "Burger's equation" tags=[:dgm] begin
+    using ModelingToolkit, Optimization, OptimizationOptimisers, Distributions,
+          MethodOfLines, OrdinaryDiffEq, LinearAlgebra
+    import ModelingToolkit: Interval, infimum, supremum
+
     @parameters x t
     @variables u(..)
 
@@ -143,9 +140,9 @@ end
         return false
     end
 
-    res = Optimization.solve(prob, Adam(0.01); callback = callback, maxiters = 200)
+    res = solve(prob, Adam(0.01); callback = callback, maxiters = 200)
     prob = remake(prob, u0 = res.u)
-    res = Optimization.solve(prob, Adam(0.001); callback = callback, maxiters = 100)
+    res = solve(prob, Adam(0.001); callback = callback, maxiters = 100)
     phi = discretization.phi
 
     u_predict = [first(phi([t, x], res.u)) for t in ts, x in xs]
diff --git a/test/direct_function_tests.jl b/test/direct_function_tests.jl
index a4488296c1..022cbae2ad 100644
--- a/test/direct_function_tests.jl
+++ b/test/direct_function_tests.jl
@@ -1,10 +1,10 @@
-using NeuralPDE, Test, Optimization, OptimizationOptimJL, OptimizationOptimisers,
-      QuasiMonteCarlo, DomainSets, Random, Lux, Optimisers
-import ModelingToolkit: Interval, infimum, supremum
+@testitem "Approximation of function 1D" tags=[:nnpde2] begin
+    using Optimization, OptimizationOptimisers, Random, DomainSets, Lux, Optimisers
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
 
-Random.seed!(110)
+    Random.seed!(110)
 
-@testset "Approximation of function 1D" begin
     @parameters x
     @variables u(..)
 
@@ -28,16 +28,24 @@ Random.seed!(110)
     discretization = PhysicsInformedNN(chain, strategy)
     @named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
     prob = discretize(pde_system, discretization)
-    res = solve(prob, Optimisers.Adam(0.05), maxiters = 1000)
+    res = solve(prob, Adam(0.05), maxiters = 1000)
     prob = remake(prob, u0 = res.u)
-    res = solve(prob, OptimizationOptimJL.BFGS(initial_stepnorm = 0.01), maxiters = 500)
+    res = solve(prob, BFGS(initial_stepnorm = 0.01), maxiters = 500)
+
     @test discretization.phi(xs', res.u)≈func(xs') rtol=0.01
 end
 
-@testset "Approximation of function 1D - 2" begin
+@testitem "Approximation of function 1D - 2" tags=[:nnpde2] begin
+    using Optimization, OptimizationOptimisers, Random, DomainSets, Lux, Optimisers
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
+    Random.seed!(110)
+
     @parameters x
     @variables u(..)
     func(x) = @. cos(5pi * x) * x
+
     eq = [u(x) ~ func(x)]
     bc = [u(0) ~ u(0)]
 
@@ -55,19 +63,27 @@ end
     prob = discretize(pde_system, discretization)
     res = solve(prob, OptimizationOptimisers.Adam(0.01), maxiters = 500)
     prob = remake(prob, u0 = res.u)
-    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 1000)
+    res = solve(prob, BFGS(), maxiters = 1000)
     dx = 0.01
     xs = collect(x0:dx:x_end)
     func_s = func(xs)
     @test discretization.phi(xs', res.u)≈func(xs') rtol=0.01
 end
 
-@testset "Approximation of function 2D" begin
+@testitem "Approximation of function 2D" tags=[:nnpde2] begin
+    using Optimization, OptimizationOptimisers, Random, DomainSets, Lux, Optimisers
+    import ModelingToolkit: Interval, infimum, supremum
+    import OptimizationOptimJL: BFGS
+
+    Random.seed!(110)
+
     @parameters x, y
     @variables u(..)
     func(x, y) = -cos(x) * cos(y) * exp(-((x - pi)^2 + (y - pi)^2))
+
     eq = [u(x, y) ~ func(x, y)]
     bc = [u(0, 0) ~ u(0, 0)]
+
     x0 = -10
     x_end = 10
     y0 = -10
@@ -77,7 +93,6 @@ end
     hidden = 25
     chain = Chain(Dense(2, hidden, tanh), Dense(hidden, hidden, tanh),
         Dense(hidden, hidden, tanh), Dense(hidden, 1))
-
     strategy = GridTraining(d)
     discretization = PhysicsInformedNN(chain, strategy)
     @named pde_system = PDESystem(eq, bc, domain, [x, y], [u(x, y)])
@@ -86,9 +101,9 @@ end
     symprob.loss_functions.full_loss_function(symprob.flat_init_params, nothing)
     res = solve(prob, OptimizationOptimisers.Adam(0.01), maxiters = 500)
     prob = remake(prob, u0 = res.u)
-    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 1000)
+    res = solve(prob, BFGS(), maxiters = 1000)
     prob = remake(prob, u0 = res.u)
-    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 500)
+    res = solve(prob, BFGS(), maxiters = 500)
     phi = discretization.phi
     xs = collect(x0:0.1:x_end)
     ys = collect(y0:0.1:y_end)
diff --git a/test/forward_tests.jl b/test/forward_tests.jl
index 77ece61c7e..c601adff9e 100644
--- a/test/forward_tests.jl
+++ b/test/forward_tests.jl
@@ -1,7 +1,7 @@
-using Test, NeuralPDE, SciMLBase, DomainSets, Lux, Random, Zygote, ComponentArrays, Adapt
-import ModelingToolkit: Interval
+@testitem "ODE" tags=[:forward] begin
+    using DomainSets, Lux, Random, Zygote, ComponentArrays, Adapt
+    import ModelingToolkit: Interval
 
-@testset "ODE" begin
     @parameters x
     @variables u(..)
 
@@ -18,7 +18,7 @@ import ModelingToolkit: Interval
     discretization = PhysicsInformedNN(chain, strategy_; init_params)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
     prob = discretize(pde_system, discretization)
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    sym_prob = symbolic_discretize(pde_system, discretization)
 
     eqs = pde_system.eqs
     bcs = pde_system.bcs
@@ -31,8 +31,8 @@ import ModelingToolkit: Interval
     train_sets = generate_training_sets(domains, dx, eqs, bcs, eltypeθ,
         dict_indvars, dict_depvars)
 
-    pde_train_sets, bcs_train_sets = train_sets
-    pde_train_sets = Adapt.adapt(eltypeθ, pde_train_sets)[1]
+    pde_train_sets, bcs_train_sets = train_sets |> NeuralPDE.EltypeAdaptor{eltypeθ}()
+    pde_train_sets = first(pde_train_sets)
 
     train_data = pde_train_sets
     pde_loss_function = sym_prob.loss_functions.datafree_pde_loss_functions[1]
@@ -41,9 +41,13 @@ import ModelingToolkit: Interval
     @test pde_loss_function(train_data, init_params)≈dudx(train_data) rtol=1e-8
 end
 
-@testset "derivatives" begin
+@testitem "derivatives" tags=[:forward] begin
+    using DomainSets, Lux, Random, Zygote, ComponentArrays
+    import ModelingToolkit: Interval
+
     chain = Chain(Dense(2, 16, σ), Dense(16, 16, σ), Dense(16, 1))
-    init_params = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray{Float64}
+    init_params = Lux.initialparameters(Random.default_rng(), chain) |>
+                  ComponentArray{Float64}
 
     eltypeθ = eltype(init_params)
     phi = NeuralPDE.Phi(chain)
@@ -81,7 +85,10 @@ end
     @test isapprox(hess_phi[4], dphi_yy, atol = 4e-5)
 end
 
-@testset "Integral" begin
+@testitem "Integral" tags=[:forward] begin
+    using DomainSets, Lux, Random, Zygote, ComponentArrays
+    import ModelingToolkit: Interval
+
     @parameters x
     @variables u(..)
     I = Integral(x in ClosedInterval(0, Inf))
@@ -95,7 +102,7 @@ end
     discretization = PhysicsInformedNN(chain, strategy_;
         init_params = init_params)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    sym_prob = symbolic_discretize(pde_system, discretization)
     prob = discretize(pde_system, discretization)
     inner_loss = sym_prob.loss_functions.datafree_pde_loss_functions[1]
     exact_u = π / (3 * sqrt(3))
@@ -111,10 +118,9 @@ end
     chain = Chain(x -> x .* exp.(-x .^ 2))
     chain([1], init_params, st)
 
-    discretization = PhysicsInformedNN(chain, strategy_;
-        init_params = init_params)
+    discretization = PhysicsInformedNN(chain, strategy_; init_params)
     @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x)])
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    sym_prob = symbolic_discretize(pde_system, discretization)
     prob = discretize(pde_system, discretization)
     inner_loss = sym_prob.loss_functions.datafree_pde_loss_functions[1]
     exact_u = 0
diff --git a/test/logging_tests.jl b/test/logging_tests.jl
deleted file mode 100644
index 36add38a37..0000000000
--- a/test/logging_tests.jl
+++ /dev/null
@@ -1,102 +0,0 @@
-using Test, NeuralPDE, Optimization, OptimizationOptimisers, Random, Lux
-import ModelingToolkit: Interval, infimum, supremum
-
-nonadaptive_loss = NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
-gradnormadaptive_loss = GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3,
-    bc_loss_weights = 1)
-adaptive_loss = MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1)
-adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, adaptive_loss]
-
-possible_logger_dir = mktempdir()
-if ENV["LOG_SETTING"] == "NoImport"
-    haslogger = false
-    expected_log_folders = 0
-elseif ENV["LOG_SETTING"] == "ImportNoUse"
-    using TensorBoardLogger
-    haslogger = false
-    expected_log_folders = 0
-elseif ENV["LOG_SETTING"] == "ImportUse"
-    using TensorBoardLogger
-    haslogger = true
-    expected_log_folders = 3
-end
-
-@info "has logger: $(haslogger), expected log folders: $(expected_log_folders)"
-
-function test_2d_poisson_equation_adaptive_loss(adaptive_loss, run, outdir, haslogger;
-        seed = 60, maxiters = 800)
-    logdir = joinpath(outdir, string(run))
-    logger = haslogger ? TBLogger(logdir) : nothing
-
-    Random.seed!(seed)
-    hid = 40
-    chain_ = Chain(Dense(2, hid, σ), Dense(hid, hid, σ), Dense(hid, 1))
-    strategy_ = StochasticTraining(256)
-
-    @parameters x y
-    @variables u(..)
-    Dxx = Differential(x)^2
-    Dyy = Differential(y)^2
-
-    # 2D PDE
-    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sinpi(x) * sinpi(y)
-
-    # Initial and boundary conditions
-    bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sinpi(1) * sinpi(y),
-        u(x, 0) ~ 0.0, u(x, 1) ~ -sinpi(x) * sinpi(1)]
-    # Space and time domains
-    domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
-
-    discretization = PhysicsInformedNN(chain_, strategy_; adaptive_loss, logger)
-
-    @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-    prob = NeuralPDE.discretize(pde_system, discretization)
-    phi = discretization.phi
-
-    xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-    sz = (length(xs), length(ys))
-    analytic_sol_func(x, y) = (sinpi(x) * sinpi(y)) / (2pi^2)
-    u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys], sz)
-
-    callback = function (p, l)
-        if p.iter % 100 == 0
-            @info "Current loss is: $l, iteration is $(p.iter)"
-        end
-        if haslogger
-            log_value(logger, "outer_error/loss", l, step = p.iter)
-            if p.iter % 30 == 0
-                u_predict = reshape([first(phi([x, y], p.u)) for x in xs for y in ys],
-                    (length(xs), length(ys)))
-                total_diff = sum(abs, u_predict .- u_real)
-                log_value(logger, "outer_error/total_diff", total_diff, step = p.iter)
-                log_value(logger, "outer_error/total_diff_rel",
-                    total_diff / sum(abs2, u_real), step = p.iter)
-                log_value(logger, "outer_error/total_diff_sq",
-                    sum(abs2, u_predict .- u_real), step = p.iter)
-            end
-        end
-        return false
-    end
-    res = solve(prob, OptimizationOptimisers.Adam(0.03); maxiters, callback)
-
-    u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys], sz)
-    diff_u = abs.(u_predict .- u_real)
-    total_diff = sum(diff_u)
-    total_u = sum(abs.(u_real))
-    total_diff_rel = total_diff / total_u
-
-    return (error = total_diff, total_diff_rel = total_diff_rel)
-end
-
-@testset "$(nameof(typeof(adaptive_loss)))" for (i, adaptive_loss) in enumerate(adaptive_losses)
-    test_2d_poisson_equation_adaptive_loss(adaptive_loss, i, possible_logger_dir,
-        haslogger; seed = 60, maxiters = 800)
-end
-
-@test length(readdir(possible_logger_dir)) == expected_log_folders
-if expected_log_folders > 0
-    @info "dirs at $(possible_logger_dir): $(string(readdir(possible_logger_dir)))"
-    for logdir in readdir(possible_logger_dir)
-        @test length(readdir(joinpath(possible_logger_dir, logdir))) > 0
-    end
-end
diff --git a/test/neural_adapter_tests.jl b/test/neural_adapter_tests.jl
index 609df34c29..8ff770374b 100644
--- a/test/neural_adapter_tests.jl
+++ b/test/neural_adapter_tests.jl
@@ -1,16 +1,22 @@
-using Test, NeuralPDE, Optimization, Lux, OptimizationOptimisers, Statistics,
-      ComponentArrays, Random, LinearAlgebra
-import ModelingToolkit: Interval, infimum, supremum
+@testsetup module NeuralAdapterTestSetup
 
-Random.seed!(100)
-
-callback = function (p, l)
+function callback(p, l)
     (p.iter == 1 || p.iter % 500 == 0) &&
         println("Current loss is: $l after $(p.iter) iterations")
     return false
 end
 
-@testset "Example, 2D Poisson equation with Neural adapter" begin
+export callback
+
+end
+
+@testitem "Neural Adapter: 2D Poisson" tags=[:neuraladapter] setup=[NeuralAdapterTestSetup] begin
+    using Optimization, Lux, OptimizationOptimisers, Statistics, ComponentArrays, Random,
+          LinearAlgebra
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
     @parameters x y
     @variables u(..)
     Dxx = Differential(x)^2
@@ -30,15 +36,12 @@ end
     domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
     quadrature_strategy = QuadratureTraining(
         reltol = 1e-3, abstol = 1e-6, maxiters = 50, batch = 100)
-    inner = 8
-    af = tanh
-    chain1 = Chain(Dense(2, inner, af), Dense(inner, inner, af), Dense(inner, 1))
+    chain1 = Chain(Dense(2, 8, tanh), Dense(8, 8, tanh), Dense(8, 1))
     discretization = PhysicsInformedNN(chain1, quadrature_strategy)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
     prob = discretize(pde_system, discretization)
-    println("Poisson equation, strategy: $(nameof(typeof(quadrature_strategy)))")
-    @time res = solve(prob, Optimisers.Adam(5e-3); callback, maxiters = 2000)
+    res = solve(prob, Adam(5e-3); callback, maxiters = 2000)
     phi = discretization.phi
 
     xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
@@ -49,9 +52,7 @@ end
 
     @test u_predict≈u_real atol=5e-2 norm=Base.Fix2(norm, Inf)
 
-    inner_ = 8
-    af = tanh
-    chain2 = Chain(Dense(2, inner_, af), Dense(inner_, inner_, af), Dense(inner_, 1))
+    chain2 = Chain(Dense(2, 8, tanh), Dense(8, 8, tanh), Dense(8, 1))
     initp, st = Lux.setup(Random.default_rng(), chain2)
     init_params2 = ComponentArray{Float64}(initp)
 
@@ -66,7 +67,7 @@ end
     @testset "$(nameof(typeof(strategy_)))" for strategy_ in [
         grid_strategy, quadrature_strategy, stochastic_strategy, quasirandom_strategy]
         prob_ = neural_adapter(loss, init_params2, pde_system, strategy_)
-        @time res_ = solve(prob_, Optimisers.Adam(5e-3); callback, maxiters = 2000)
+        res_ = solve(prob_, Optimisers.Adam(5e-3); callback, maxiters = 2000)
         discretization = PhysicsInformedNN(chain2, strategy_; init_params = res_.u)
         phi_ = discretization.phi
 
@@ -75,7 +76,13 @@ end
     end
 end
 
-@testset "Example, 2D Poisson equation, domain decomposition" begin
+@testitem "Neural Adapter: 2D Poisson, domain decomposition" tags=[:neuraladapter] setup=[NeuralAdapterTestSetup] begin
+    using Optimization, Lux, OptimizationOptimisers, Statistics, ComponentArrays, Random,
+          LinearAlgebra
+    import ModelingToolkit: Interval, infimum, supremum
+
+    Random.seed!(100)
+
     @parameters x y
     @variables u(..)
     Dxx = Differential(x)^2
@@ -83,21 +90,22 @@ end
 
     eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sinpi(x) * sinpi(y)
 
-    bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sinpi(1) * sinpi(y),
-        u(x, 0) ~ 0.0, u(x, 1) ~ -sinpi(x) * sinpi(1)]
+    bcs = [
+        u(0, y) ~ 0.0,
+        u(1, y) ~ -sinpi(1) * sinpi(y),
+        u(x, 0) ~ 0.0,
+        u(x, 1) ~ -sinpi(x) * sinpi(1)
+    ]
 
     # Space
-    x_0 = 0.0
-    x_end = 1.0
+    x_0, x_end = 0.0, 1.0
     x_domain = Interval(x_0, x_end)
     y_domain = Interval(0.0, 1.0)
     domains = [x ∈ x_domain, y ∈ y_domain]
     count_decomp = 10
 
     # Neural network
-    af = tanh
-    inner = 12
-    chains = [Chain(Dense(2, inner, af), Dense(inner, inner, af), Dense(inner, 1))
+    chains = [Chain(Dense(2, 8, tanh), Dense(8, 8, tanh), Dense(8, 1))
               for _ in 1:count_decomp]
 
     xs_ = infimum(x_domain):(1 / count_decomp):supremum(x_domain)
@@ -125,8 +133,6 @@ end
     pde_system_map = []
 
     for i in 1:count_decomp
-        println("decomposition $i")
-
         domains_ = domains_map[i]
         phi_in(cord) = phis[i - 1](cord, reses[i - 1].u)
         phi_bound(x, y) = phi_in(vcat(x, y))
@@ -135,11 +141,11 @@ end
         bcs_ = create_bcs(domains_[1].domain, phi_bound)
         @named pde_system_ = PDESystem(eq, bcs_, domains_, [x, y], [u(x, y)])
         push!(pde_system_map, pde_system_)
+
         strategy = GridTraining([0.1 / count_decomp, 0.1])
         discretization = PhysicsInformedNN(chains[i], strategy)
         prob = discretize(pde_system_, discretization)
-        @time res_ = solve(prob, Optimisers.Adam(5e-3); callback, maxiters = 2000)
-        @show res_.objective
+        res_ = solve(prob, Optimisers.Adam(5e-3); callback, maxiters = 2000)
         phi = discretization.phi
 
         push!(reses, res_)
@@ -172,13 +178,12 @@ end
         diff_u = reshape(diff_u_array, (length(xs), length(ys)))
         u_predict, diff_u
     end
+
     dx = 0.01
     u_predict, diff_u = compose_result(dx)
 
-    inner_ = 18
-    af = tanh
-    chain2 = Chain(Dense(2, inner_, af), Dense(inner_, inner_, af),
-        Dense(inner_, inner_, af), Dense(inner_, inner_, af), Dense(inner_, 1))
+    chain2 = Chain(Dense(2, 18, tanh), Dense(18, 18, tanh), Dense(18, 18, tanh),
+        Dense(18, 18, tanh), Dense(18, 1))
 
     initp, st = Lux.setup(Random.default_rng(), chain2)
     init_params2 = ComponentArray{Float64}(initp)
@@ -191,11 +196,10 @@ end
 
     prob_ = neural_adapter(
         losses, init_params2, pde_system_map, GridTraining([0.1 / count_decomp, 0.1]))
-    @time res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); callback, maxiters = 2000)
-    @show res_.objective
+    res_ = solve(prob_, Adam(5e-3); callback, maxiters = 2000)
+
     prob_ = neural_adapter(losses, res_.u, pde_system_map, GridTraining(0.01))
-    @time res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); callback, maxiters = 2000)
-    @show res_.objective
+    res_ = solve(prob_, Adam(5e-3); callback, maxiters = 2000)
 
     phi_ = NeuralPDE.Phi(chain2)
     xs, ys = [infimum(d.domain):dx:supremum(d.domain) for d in domains]
diff --git a/test/qa.jl b/test/qa_tests.jl
similarity index 68%
rename from test/qa.jl
rename to test/qa_tests.jl
index 9df0e603b2..d73f17c9dd 100644
--- a/test/qa.jl
+++ b/test/qa_tests.jl
@@ -1,11 +1,13 @@
-using NeuralPDE, Aqua, ExplicitImports
+@testitem "Aqua" tags=[:qa] begin
+    using NeuralPDE, Aqua
 
-@testset "Aqua" begin
     Aqua.test_all(NeuralPDE; ambiguities = false)
     Aqua.test_ambiguities(NeuralPDE, recursive = false)
 end
 
-@testset "ExplicitImports" begin
+@testitem "ExplicitImports" tags=[:qa] begin
+    using NeuralPDE, ExplicitImports
+
     @test check_no_implicit_imports(NeuralPDE) === nothing
     @test check_no_stale_explicit_imports(NeuralPDE) === nothing
     @test check_all_qualified_accesses_via_owners(NeuralPDE) === nothing
diff --git a/test/runtests.jl b/test/runtests.jl
index 16ebea0e05..dc2d2d7df8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,64 +1,20 @@
-using Pkg, SafeTestsets, Test
+using ReTestItems, InteractiveUtils, Hwloc
 
-const GROUP = get(ENV, "GROUP", "All")
+@info sprint(versioninfo)
 
-@time begin
-    if GROUP == "All" || GROUP == "QA"
-        @time @safetestset "Quality Assurance" include("qa.jl")
-    end
+const GROUP = lowercase(get(ENV, "GROUP", "all"))
 
-    if GROUP == "All" || GROUP == "ODEBPINN"
-        @time @safetestset "BPINN ODE solver" include("BPINN_Tests.jl")
-    end
+const RETESTITEMS_NWORKERS = parse(
+    Int, get(ENV, "RETESTITEMS_NWORKERS", string(min(Hwloc.num_physical_cores(), 4))))
+const RETESTITEMS_NWORKER_THREADS = parse(Int,
+    get(ENV, "RETESTITEMS_NWORKER_THREADS",
+        string(max(Hwloc.num_virtual_cores() ÷ RETESTITEMS_NWORKERS, 1))))
 
-    if GROUP == "All" || GROUP == "PDEBPINN"
-        @time @safetestset "BPINN PDE solver" include("BPINN_PDE_tests.jl")
-        @time @safetestset "BPINN PDE invaddloss solver" include("BPINN_PDEinvsol_tests.jl")
-    end
+using NeuralPDE
 
-    if GROUP == "All" || GROUP == "NNPDE1"
-        @time @safetestset "NNPDE" include("NNPDE_tests.jl")
-    end
+@info "Running tests with $(RETESTITEMS_NWORKERS) workers and \
+       $(RETESTITEMS_NWORKER_THREADS) threads for group $(GROUP)"
 
-    if GROUP == "All" || GROUP == "NNODE"
-        @time @safetestset "NNODE" include("NNODE_tests.jl")
-        @time @safetestset "NNODE_tstops" include("NNODE_tstops_test.jl")
-        @time @safetestset "NNDAE" include("NNDAE_tests.jl")
-    end
-
-    if GROUP == "All" || GROUP == "NNPDE2"
-        @time @safetestset "Additional Loss" include("additional_loss_tests.jl")
-        @time @safetestset "Direction Function Approximation" include("direct_function_tests.jl")
-    end
-
-    if GROUP == "All" || GROUP == "NeuralAdapter"
-        @time @safetestset "NeuralAdapter" include("neural_adapter_tests.jl")
-    end
-
-    if GROUP == "All" || GROUP == "IntegroDiff"
-        @time @safetestset "IntegroDiff" include("IDE_tests.jl")
-    end
-
-    if GROUP == "All" || GROUP == "AdaptiveLoss"
-        @time @safetestset "AdaptiveLoss" include("adaptive_loss_tests.jl")
-    end
-
-    if GROUP == "All" || GROUP == "Forward"
-        @time @safetestset "Forward" include("forward_tests.jl")
-    end
-
-    if GROUP == "All" || GROUP == "Logging"
-        @testset for log_setting in ["NoImport", "ImportNoUse", "ImportUse"]
-            ENV["LOG_SETTING"] = log_setting
-            @time @safetestset "Logging" include("logging_tests.jl")
-        end
-    end
-
-    if GROUP == "CUDA"
-        @safetestset "NNPDE_gpu_Lux" include("NNPDE_tests_gpu_Lux.jl")
-    end
-
-    if GROUP == "All" || GROUP == "DGM"
-        @time @safetestset "Deep Galerkin solver" include("dgm_test.jl")
-    end
-end
+ReTestItems.runtests(NeuralPDE; tags = (GROUP == "all" ? nothing : [Symbol(GROUP)]),
+    nworkers = RETESTITEMS_NWORKERS,
+    nworker_threads = RETESTITEMS_NWORKER_THREADS, testitem_timeout = 3600)

From c4330a7c3f5c9f444fcfc7adee55e19cd2a09e19 Mon Sep 17 00:00:00 2001
From: Avik Pal <avik.pal.2017@gmail.com>
Date: Thu, 17 Oct 2024 21:27:08 -0400
Subject: [PATCH 055/107] chore: bump version for release

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index f060266905..304d6efb97 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "NeuralPDE"
 uuid = "315f7962-48a3-4962-8226-d0f33b1235f0"
 authors = ["Chris Rackauckas <accounts@chrisrackauckas.com>"]
-version = "5.16.0"
+version = "5.17.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"

From 699fd7d6f9dd47614864899e360badcf0453f147 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 18 Oct 2024 13:53:23 +0530
Subject: [PATCH 056/107] changes from reviews

---
 src/PDE_BPINN.jl    |   2 +-
 test/BPINN_Tests.jl | 167 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 166 insertions(+), 3 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 044080118e..8957df889b 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -134,7 +134,7 @@ end
 
 function LogDensityProblems.logdensity(Tar::PDELogTargetDensity, θ)
     # for parameter estimation neccesarry to use multioutput case
-    if Tar.L2_loss2 isa Nothing
+    if Tar.L2_loss2 === nothing
         return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) +
                priorlogpdf(Tar, θ) + L2LossData(Tar, θ)
     else
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 1f5672d3f4..657776bb29 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -6,7 +6,7 @@ using Statistics, Random, Functors, ComponentArrays
 using NeuralPDE, MonteCarloMeasurements
 using Flux
 
-# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
+# note that current testing bounds can be further tightened but have been inflated for support for Julia build v1
 # on latest Julia version it performs much better for below tests
 Random.seed!(100)
 
@@ -346,4 +346,167 @@ end
     # estimated parameters(lux chain)
     param3 = sol3lux_pestim.estimated_de_params[1]
     @test abs(param3 - p) < abs(0.2 * p)
-end
\ No newline at end of file
+end
+
+@testset "Example 4 - improvement" begin
+    function lotka_volterra(u, p, t)
+        # Model parameters.
+        α, β, γ, δ = p
+        # Current state.
+        x, y = u
+
+        # Evaluate differential equations.
+        dx = (1 - β * y) * x * α # prey
+        dy = (δ * x - 1) * y * γ  # predator
+
+        return [dx, dy]
+    end
+
+    # initial-value problem.
+    u0 = [1.0, 1.0]
+    p = [1.5, 2 / 3, 3.0, 1 / 3]
+    tspan = (0.0, 4.0)
+    prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+    # Solve using OrdinaryDiffEq.jl solver
+    dt = 0.2
+    solution = solve(prob, Tsit5(); saveat = dt)
+
+    times = solution.t
+    u = hcat(solution.u...)
+    x = u[1, :] + (0.8 .* randn(length(u[1, :])))
+    y = u[2, :] + (0.8 .* randn(length(u[2, :])))
+    dataset = [x, y, times]
+
+    chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+        Lux.Dense(6, 2))
+
+    alg1 = BNNODE(chain;
+        dataset = dataset,
+        draw_samples = 1000,
+        l2std = [0.05, 0.05],
+        phystd = [0.2, 0.2],
+        priorsNNw = (0.0, 1.0),
+        param = [
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5)])
+
+    alg2 = BNNODE(chain;
+        dataset = dataset,
+        draw_samples = 1000,
+        l2std = [0.05, 0.05],
+        phystd = [0.2, 0.2],
+        phynewstd = [0.3, 0.1],
+        priorsNNw = (0.0, 1.0),
+        param = [
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5)], estim_collocate = true)
+
+    @time sol_pestim1 = solve(prob, alg1; saveat = dt)
+    @time sol_pestim2 = solve(prob, alg2; saveat = dt)
+
+    unsafe_comparisons(true)
+    bitvec = abs.(p .- sol_pestim1.estimated_de_params) .>
+             abs.(p .- sol_pestim2.estimated_de_params)
+    @test bitvec == ones(size(bitvec))
+end
+
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    β, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (3 - β * y) * x # prey
+    dy = (δ * x - 3) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [2, 1]
+tspan = (0.0, 4.0)
+
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+# Solve using OrdinaryDiffEq.jl solver
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+
+times = solution.t
+u = hcat(solution.u...)
+x = u[1, :] + (0.4 .* randn(length(u[1, :])))
+y = u[2, :] + (0.4 .* randn(length(u[2, :])))
+dataset = [x, y, times]
+scatter!(times, x)
+scatter!(times, y)
+chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+    Lux.Dense(6, 2))
+
+alg1 = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.2, 0.2],
+    priorsNNw = (0.0, 1.0),
+    param = [
+        Normal(1,1),
+        Normal(1,1),], progress = true)
+
+alg2 = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.2, 0.2],
+    phynewstd = [0.2, 0.2],
+    priorsNNw = (0.0, 1.0),
+    param = [
+        Normal(1,1),
+        Normal(1,1)], estim_collocate = true, progress = true)
+
+@time sol_pestim1 = solve(prob, alg1; saveat = dt)
+@time sol_pestim2 = solve(prob, alg2; saveat = dt)
+
+unsafe_comparisons(true)
+bitvec = abs.(p .- pmean(sol_pestim1.estimated_de_params)) .>
+         abs.(p .- pmean(sol_pestim2.estimated_de_params))
+@test bitvec == ones(size(bitvec))
+
+
+pmean(sol_pestim1.estimated_de_params)
+
+sol_pestim2.estimated_de_params
+sol_pestim1.estimated_de_params
+sol_pestim2.estimated_de_params
+
+sol_pestim1.estimated_de_params
+sol_pestim2.estimated_de_params
+
+sol_pestim1.estimated_de_params
+sol_pestim2.estimated_de_params
+
+p
+sol_pestim1.timepoints
+plot!(sol_pestim1.timepoints, sol_pestim1.ensemblesol[1])
+plot!(sol_pestim2.timepoints, sol_pestim2.ensemblesol[1])
+plot!(sol_pestim1.timepoints, sol_pestim1.ensemblesol[2])
+plot!(sol_pestim2.timepoints, sol_pestim2.ensemblesol[2])
+
+plot!(sol_pestim1.timepoints, pmean(sol_pestim1.ensemblesol[1]))
+plot!(sol_pestim2.timepoints, pmean(sol_pestim2.ensemblesol[1]))
+plot!(sol_pestim1.timepoints, pmean(sol_pestim1.ensemblesol[2]))
+plot!(sol_pestim2.timepoints, pmean(sol_pestim2.ensemblesol[2]))
+
+
+plot(times, u[1, :])
+plot!(times, u[2, :])
+
+plot(sol_pestim1.ensemblesol)
+
+# Parametric PDEs are ill posed problems as, non convex optimization and non global minima might be our solution
\ No newline at end of file

From 60b1351a1dff8f6460e6a6075139200e1bb62fe1 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 18 Oct 2024 20:32:57 +0530
Subject: [PATCH 057/107] conflicts

---
 Project.toml     | 103 +++++++++++++++--------
 src/BPINN_ode.jl | 214 ++++++++++++++++++-----------------------------
 2 files changed, 149 insertions(+), 168 deletions(-)

diff --git a/Project.toml b/Project.toml
index 32d95d792b..d0ddd63ccf 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,100 +1,133 @@
 name = "NeuralPDE"
 uuid = "315f7962-48a3-4962-8226-d0f33b1235f0"
 authors = ["Chris Rackauckas <accounts@chrisrackauckas.com>"]
-version = "5.16.0"
+version = "5.17.0"
 
 [deps]
+ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
+ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 Cubature = "667455a9-e2ce-5579-9412-b964f529a492"
-DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 Integrals = "de52edbc-65ea-441a-8357-d3a637375a31"
+IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
+MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
 ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
 MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca"
-Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
 OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+SymbolicIndexingInterface = "2efcf032-c050-4f8e-a9bb-153293bab1f5"
 SymbolicUtils = "d1185830-fcd6-423d-90d6-eec64667417b"
 Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
-UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
+WeightInitializers = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
+[weakdeps]
+TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
+
+[extensions]
+NeuralPDETensorBoardLoggerExt = "TensorBoardLogger"
+
 [compat]
+ADTypes = "1.9.0"
 Adapt = "4"
 AdvancedHMC = "0.6.1"
-Aqua = "0.8"
-ArrayInterface = "7.9"
-CUDA = "5.3"
+Aqua = "0.8.9"
+ArrayInterface = "7.11"
+CUDA = "5.5.2"
 ChainRulesCore = "1.24"
-ComponentArrays = "0.15.14"
+ComponentArrays = "0.15.16"
+ConcreteStructs = "0.2.3"
 Cubature = "1.5"
 DiffEqNoiseProcess = "5.20"
 Distributions = "0.25.107"
 DocStringExtensions = "0.9.3"
-DomainSets = "0.6, 0.7"
-Flux = "0.14.11"
+DomainSets = "0.7"
+ExplicitImports = "1.10.1"
+Flux = "0.14.22"
 ForwardDiff = "0.10.36"
-Functors = "0.4.10"
-Integrals = "4.4"
-LineSearches = "7.2"
-LinearAlgebra = "1"
+Functors = "0.4.12"
+Hwloc = "3.3.0"
+Integrals = "4.5"
+InteractiveUtils = "<0.0.1, 1"
+IntervalSets = "0.7.10"
+LineSearches = "7.3"
+LinearAlgebra = "1.10"
 LogDensityProblems = "2"
-Lux = "0.5.58"
-LuxCUDA = "0.3.2"
+Lux = "1.1.0"
+LuxCUDA = "0.3.3"
+LuxCore = "1.0.1"
+LuxLib = "1.3.2"
 MCMCChains = "6"
-MethodOfLines = "0.11"
-ModelingToolkit = "9.9"
+MLDataDevices = "1.2.0"
+MethodOfLines = "0.11.6"
+ModelingToolkit = "9.46"
 MonteCarloMeasurements = "1.1"
-Optim = "1.7.8"
-Optimization = "3.24, 4"
-OptimizationOptimJL = "0.2.1"
-OptimizationOptimisers = "0.2.1, 0.3"
-OrdinaryDiffEq = "6.74"
-Pkg = "1"
+Optimisers = "0.3.3"
+Optimization = "4"
+OptimizationOptimJL = "0.4"
+OptimizationOptimisers = "0.3"
+OrdinaryDiffEq = "6.87"
+Printf = "1.10"
 QuasiMonteCarlo = "0.3.2"
 Random = "1"
+ReTestItems = "1.29.0"
+RecursiveArrayTools = "3.27.0"
 Reexport = "1.2"
 RuntimeGeneratedFunctions = "0.5.12"
-SafeTestsets = "0.1"
-SciMLBase = "2.28"
-Statistics = "1.11"
-SymbolicUtils = "1.5, 2, 3"
-Symbolics = "5.27.1, 6"
-Test = "1"
-UnPack = "1"
-Zygote = "0.6.69"
+SciMLBase = "2.56"
+Statistics = "1.10"
+StochasticDiffEq = "6.69.1"
+SymbolicIndexingInterface = "0.3.31"
+SymbolicUtils = "3.7.2"
+Symbolics = "6.14"
+TensorBoardLogger = "0.1.24"
+Test = "1.10"
+WeightInitializers = "1.0.3"
+Zygote = "0.6.71"
 julia = "1.10"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
+ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
+LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
+LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11"
 MethodOfLines = "94925ecb-adb7-4558-8ed8-f975c56a0bf4"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
+ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
+StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0"
+TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "Test", "CUDA", "SafeTestsets", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "LineSearches", "LuxCUDA", "Flux", "MethodOfLines"]
+test = ["Aqua", "CUDA", "DiffEqNoiseProcess", "ExplicitImports", "Flux", "Hwloc", "InteractiveUtils", "LineSearches", "LuxCUDA", "LuxCore", "LuxLib", "MethodOfLines", "OptimizationOptimJL", "OrdinaryDiffEq", "ReTestItems", "StochasticDiffEq", "TensorBoardLogger", "Test"]
\ No newline at end of file
diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 39bb0aac72..243d681298 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -1,16 +1,18 @@
 # HIGH level API for BPINN ODE solver
 
 """
-    BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
-                        priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
-                        phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
-                        MCMCargs = (n_leapfrog=30), nchains = 1, init_params = nothing,
-                        Adaptorkwargs = (Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, Metric = DiagEuclideanMetric),
-                        Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
-                        progress = false, verbose = false)
-
-Algorithm for solving ordinary differential equations using a Bayesian neural network. This is a specialization
-of the physics-informed neural network which is used as a solver for a standard `ODEProblem`.
+    BNNODE(chain, kernel = HMC; strategy = nothing, draw_samples = 2000,
+           priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
+           phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
+           MCMCargs = (; n_leapfrog=30), nchains = 1, init_params = nothing,
+           Adaptorkwargs = (; Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
+                              Metric = DiagEuclideanMetric),
+           Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
+           progress = false, verbose = false)
+
+Algorithm for solving ordinary differential equations using a Bayesian neural network. This
+is a specialization of the physics-informed neural network which is used as a solver for a
+standard `ODEProblem`.
 
 !!! warn
 
@@ -20,10 +22,11 @@ of the physics-informed neural network which is used as a solver for a standard
 
 ## Positional Arguments
 
-* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer`.
-* `Kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC`
+* `chain`: A neural network architecture, defined as a `Lux.AbstractLuxLayer`.
+* `kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC`
 
 ## Keyword Arguments
+
 (refer `NeuralPDE.ahmc_bayesian_pinn_ode` keyword arguments.)
 
 ## Example
@@ -44,18 +47,15 @@ dataset = [x̂, time]
 
 chainlux = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
 
-alg = BNNODE(chainlux, draw_samples = 2000,
-                       l2std = [0.05], phystd = [0.05],
-                       priorsNNw = (0.0, 3.0), progress = true)
+alg = BNNODE(chainlux; draw_samples = 2000, l2std = [0.05], phystd = [0.05],
+             priorsNNw = (0.0, 3.0), progress = true)
 
 sol_lux = solve(prob, alg)
 
 # with parameter estimation
-alg = BNNODE(chainlux,dataset = dataset,
-                draw_samples = 2000,l2std = [0.05],
-                phystd = [0.05],priorsNNw = (0.0, 10.0),
-                param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
-                progress = true)
+alg = BNNODE(chainlux; dataset, draw_samples = 2000, l2std = [0.05], phystd = [0.05],
+             priorsNNw = (0.0, 10.0), param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
+             progress = true)
 
 sol_lux_pestim = solve(prob, alg)
 ```
@@ -71,62 +71,49 @@ is an accurate interpolation (up to the neural network training result). In addi
 
 ## References
 
-Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural Networks for
-Forward and Inverse PDE Problems with Noisy Data".
+Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural
+Networks for Forward and Inverse PDE Problems with Noisy Data".
 
 Kevin Linka, Amelie Schäfer, Xuhui Meng, Zongren Zou, George Em Karniadakis, Ellen Kuhl
 "Bayesian Physics Informed Neural Networks for real-world nonlinear dynamical systems".
 """
-struct BNNODE{C, K, IT <: NamedTuple,
-    A <: NamedTuple, H <: NamedTuple,
-    ST <: Union{Nothing, AbstractTrainingStrategy},
-    I <: Union{Nothing, <:NamedTuple, Vector{<:AbstractFloat}},
-    P <: Union{Nothing, Vector{<:Distribution}},
-    D <:
-    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}} <:
-       NeuralPDEAlgorithm
-    chain::C
-    Kernel::K
-    strategy::ST
-    draw_samples::Int64
+@concrete struct BNNODE <: NeuralPDEAlgorithm
+    chain <: AbstractLuxLayer
+    kernel
+    strategy <: Union{Nothing, AbstractTrainingStrategy}
+    draw_samples::Int
     priorsNNw::Tuple{Float64, Float64}
-    param::P
+    param <: Union{Nothing, Vector{<:Distribution}}
     l2std::Vector{Float64}
     phystd::Vector{Float64}
     phynewstd::Vector{Float64}
-    dataset::D
+    dataset <: Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
     physdt::Float64
-    MCMCkwargs::H
-    nchains::Int64
-    init_params::I
-    Adaptorkwargs::A
-    Integratorkwargs::IT
-    numensemble::Int64
+    MCMCkwargs <: NamedTuple
+    nchains::Int
+    init_params <: Union{Nothing, <:NamedTuple, Vector{<:AbstractFloat}}
+    Adaptorkwargs <: NamedTuple
+    Integratorkwargs <: NamedTuple
+    numensemble::Int
     estim_collocate::Bool
     autodiff::Bool
     progress::Bool
     verbose::Bool
 end
-function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
-        priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05], phynewstd = [0.05],
-        dataset = [nothing], physdt = 1 / 20.0, MCMCkwargs = (n_leapfrog = 30,), nchains = 1,
-        init_params = nothing,
+
+function BNNODE(chain, kernel = HMC; strategy = nothing, draw_samples = 2000,
+        priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05],
+        phynewstd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
+        MCMCkwargs = (n_leapfrog = 30,), nchains = 1, init_params = nothing,
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-            Metric = DiagEuclideanMetric,
-            targetacceptancerate = 0.8),
+            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,),
         numensemble = floor(Int, draw_samples / 3),
-        estim_collocate = false,
-        autodiff = false, progress = false, verbose = false)
-    !(chain isa Lux.AbstractExplicitLayer) &&
-        (chain = adapt(FromFluxAdaptor(false, false), chain))
-    BNNODE(chain, Kernel, strategy,
-        draw_samples, priorsNNw, param, l2std,
-        phystd, phynewstd, dataset, physdt, MCMCkwargs,
-        nchains, init_params,
-        Adaptorkwargs, Integratorkwargs,
-        numensemble, estim_collocate,
-        autodiff, progress, verbose)
+        estim_collocate = false, autodiff = false, progress = false, verbose = false)
+    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
+    return BNNODE(chain, kernel, strategy, draw_samples, priorsNNw, param, l2std, phystd,
+        phynewstd, dataset, physdt, MCMCkwargs, nchains, init_params, Adaptorkwargs,
+        Integratorkwargs, numensemble, estim_collocate, autodiff, progress, verbose)
 end
 
 """
@@ -144,98 +131,59 @@ Contains `ahmc_bayesian_pinn_ode()` function output:
     - step_size
     - nom_step_size
 """
-struct BPINNstats{MC, S, ST}
-    mcmc_chain::MC
-    samples::S
-    statistics::ST
+@concrete struct BPINNstats
+    mcmc_chain
+    samples
+    statistics
 end
 
 """
-BPINN Solution contains the original solution from AdvancedHMC.jl sampling (BPINNstats contains fields related to that).
+BPINN Solution contains the original solution from AdvancedHMC.jl sampling (BPINNstats
+contains fields related to that).
 
-1. `ensemblesol` is the Probabilistic Estimate (MonteCarloMeasurements.jl Particles type) of Ensemble solution from All Neural Network's (made using all sampled parameters) output's.
+1. `ensemblesol` is the Probabilistic Estimate (MonteCarloMeasurements.jl Particles type) of
+   Ensemble solution from All Neural Network's (made using all sampled parameters) output's.
 2. `estimated_nn_params` - Probabilistic Estimate of NN params from sampled weights, biases.
-3. `estimated_de_params` - Probabilistic Estimate of DE params from sampled unknown DE parameters.
+3. `estimated_de_params` - Probabilistic Estimate of DE params from sampled unknown DE
+   parameters.
 """
-struct BPINNsolution{O <: BPINNstats, E, NP, OP, P}
-    original::O
-    ensemblesol::E
-    estimated_nn_params::NP
-    estimated_de_params::OP
-    timepoints::P
-
-    function BPINNsolution(original,
-            ensemblesol,
-            estimated_nn_params,
-            estimated_de_params,
-            timepoints)
-        new{typeof(original), typeof(ensemblesol), typeof(estimated_nn_params),
-            typeof(estimated_de_params), typeof(timepoints)}(
-            original, ensemblesol, estimated_nn_params,
-            estimated_de_params, timepoints)
-    end
+@concrete struct BPINNsolution
+    original <: BPINNstats
+    ensemblesol
+    estimated_nn_params
+    estimated_de_params
+    timepoints
 end
 
-function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
-        alg::BNNODE,
-        args...;
-        dt = nothing,
-        timeseries_errors = true,
-        save_everystep = true,
-        adaptive = false,
-        abstol = 1.0f-6,
-        reltol = 1.0f-3,
-        verbose = false,
-        saveat = 1 / 50.0,
-        maxiters = nothing,)
-    @unpack chain, l2std, phystd, phynewstd, param, priorsNNw, Kernel, strategy,
-    draw_samples, dataset, init_params,
-    nchains, physdt, Adaptorkwargs, Integratorkwargs,
-    MCMCkwargs, numensemble, estim_collocate, autodiff, progress,
-    verbose = alg
+function SciMLBase.__solve(prob::SciMLBase.ODEProblem, alg::BNNODE, args...; dt = nothing,
+        timeseries_errors = true, save_everystep = true, adaptive = false,
+        abstol = 1.0f-6, reltol = 1.0f-3, verbose = false, saveat = 1 / 50.0,
+        maxiters = nothing)
+    (; chain, param, strategy, draw_samples, numensemble, verbose) = alg
 
     # ahmc_bayesian_pinn_ode needs param=[] for easier vcat operation for full vector of parameters
     param = param === nothing ? [] : param
     strategy = strategy === nothing ? GridTraining : strategy
 
-    if draw_samples < 0
-        throw(error("Number of samples to be drawn has to be >=0."))
-    end
+    @assert alg.draw_samples≥0 "Number of samples to be drawn has to be >=0."
 
-    mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain,
-        strategy = strategy, dataset = dataset,
-        draw_samples = draw_samples,
-        init_params = init_params,
-        physdt = physdt, phynewstd = phynewstd,
-        l2std = l2std,
-        phystd = phystd,
-        priorsNNw = priorsNNw,
-        param = param,
-        nchains = nchains,
-        autodiff = autodiff,
-        Kernel = Kernel,
-        Adaptorkwargs = Adaptorkwargs,
-        Integratorkwargs = Integratorkwargs,
-        MCMCkwargs = MCMCkwargs,
-        progress = progress,
-        verbose = verbose,
-        estim_collocate = estim_collocate)
+    mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(
+        prob, chain; strategy, alg.dataset, alg.draw_samples, alg.init_params,
+        alg.physdt, alg.l2std, alg.phystd, alg.phynewstd, alg.priorsNNw, param, alg.nchains,
+        alg.autodiff, Kernel = alg.kernel, alg.Adaptorkwargs, alg.Integratorkwargs,
+        alg.MCMCkwargs, alg.progress, alg.verbose, alg.estim_collocate)
 
     fullsolution = BPINNstats(mcmcchain, samples, statistics)
     ninv = length(param)
     t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2])
 
-    if chain isa Lux.AbstractExplicitLayer
-        θinit, st = Lux.setup(Random.default_rng(), chain)
-        θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
-             for i in 1:max(draw_samples - draw_samples ÷ 10, draw_samples - 1000)]
+    θinit, st = LuxCore.setup(Random.default_rng(), chain)
+    θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
+         for i in 1:max(draw_samples - draw_samples ÷ 10, draw_samples - 1000)]
 
-        luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
-        # only need for size
-        θinit = collect(ComponentArrays.ComponentArray(θinit))
-    else
-        throw(error("Only Lux.AbstractExplicitLayer neural networks are supported"))
-    end
+    luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
+    # only need for size
+    θinit = collect(ComponentArray(θinit))
 
     # constructing ensemble predictions
     ensemblecurves = Vector{}[]
@@ -278,5 +226,5 @@ function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
                             for i in (nnparams + 1):(nnparams + ninv)]
     end
 
-    BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params, t)
-end
+    return BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params, t)
+end
\ No newline at end of file

From f5eca91691131e0f49f409cfb7362d4cab41b0dd Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 18 Oct 2024 21:55:11 +0530
Subject: [PATCH 058/107] managing conflicts 2

---
 src/PDE_BPINN.jl        | 441 ++++++++++++-----------------
 src/advancedHMC_MCMC.jl | 603 +++++++++++++++-------------------------
 src/discretize.jl       | 434 ++++++++++-------------------
 3 files changed, 564 insertions(+), 914 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 8957df889b..4f7e51b3a0 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -1,70 +1,27 @@
-mutable struct PDELogTargetDensity{
-    ST <: AbstractTrainingStrategy,
-    D <: Union{Nothing, Vector{<:Matrix{<:Real}}},
-    P <: Vector{<:Distribution},
-    I,
-    F,
-    FF,
-    PH
-}
-    dim::Int64
-    strategy::ST
-    dataset::D
-    priors::P
+@concrete struct PDELogTargetDensity
+    dim::Int
+    strategy <: AbstractTrainingStrategy
+    dataset <: Union{Nothing, Vector{<:Matrix{<:Real}}}
+    priors <: Vector{<:Distribution}
     allstd::Vector{Vector{Float64}}
+    phynewstd::Vector{Float64}
     names::Tuple
     extraparams::Int
-    init_params::I
-    full_loglikelihood::F
-    L2_loss2::FF
-    Φ::PH
-
-    function PDELogTargetDensity(dim, strategy, dataset,
-            priors, allstd, names, extraparams,
-            init_params::AbstractVector, full_loglikelihood, L2_loss2, Φ)
-        new{
-            typeof(strategy),
-            typeof(dataset),
-            typeof(priors),
-            typeof(init_params),
-            typeof(full_loglikelihood),
-            typeof(L2_loss2),
-            typeof(Φ)
-        }(dim,
-            strategy,
-            dataset,
-            priors,
-            allstd,
-            names,
-            extraparams,
-            init_params,
-            full_loglikelihood,
-            L2_loss2,
-            Φ)
-    end
-    function PDELogTargetDensity(dim, strategy, dataset,
-            priors, allstd, names, extraparams,
-            init_params::Union{NamedTuple, ComponentArrays.ComponentVector},
-            full_loglikelihood, L2_loss2, Φ)
-        new{
-            typeof(strategy),
-            typeof(dataset),
-            typeof(priors),
-            typeof(init_params),
-            typeof(full_loglikelihood),
-            typeof(L2_loss2),
-            typeof(Φ)
-        }(dim,
-            strategy,
-            dataset,
-            priors,
-            allstd,
-            names,
-            extraparams,
-            init_params,
-            full_loglikelihood,
-            L2_loss2,
-            Φ)
+    init_params <: Union{AbstractVector, NamedTuple, ComponentArray}
+    full_loglikelihood::Any
+    L2_loss2::Any
+    Φ::Any
+end
+
+function LogDensityProblems.logdensity(ltd::PDELogTargetDensity, θ)
+    # for parameter estimation neccesarry to use multioutput case
+    if Tar.L2_loss2 === nothing
+        return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) +
+               priorlogpdf(Tar, θ) + L2LossData(Tar, θ)
+    else
+        return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) +
+               priorlogpdf(Tar, θ) + L2LossData(Tar, θ) +
+               Tar.L2_loss2(setparameters(Tar, θ), Tar.phynewstd)
     end
 end
 
@@ -87,7 +44,7 @@ function get_lossy(pinnrep, dataset, Dict_differentials)
     # for each dataset point(eq_sub dictionary), substitute in masked equations
     # n_collocated_equations = n_rows_dataset(or n_indvar_coords_dataset)
     masked_colloc_equations = [[substitute(eq, eq_sub) for eq in eqs_new]
-                            for eq_sub in eq_subs]
+                               for eq_sub in eq_subs]
     # now we have vector of dataset depvar's collocated equations
 
     # reverse dict for re-substituting values of Differential(t)(u(t)) etc
@@ -95,15 +52,17 @@ function get_lossy(pinnrep, dataset, Dict_differentials)
 
     # unmask Differential terms in masked_colloc_equations
     colloc_equations = [substitute.(masked_colloc_equation, Ref(rev_Dict_differentials))
-                    for masked_colloc_equation in masked_colloc_equations]
+                        for masked_colloc_equation in masked_colloc_equations]
 
     # nested vector of datafree_pde_loss_functions (as in discretize.jl)
     # each sub vector has dataset's indvar coord's datafree_colloc_loss_function, n_subvectors = n_rows_dataset(or n_indvar_coords_dataset)
     # zip each colloc equation with args for each build_loss call per equation vector
     datafree_colloc_loss_functions = [[build_loss_function(pinnrep, eq, pde_indvar)
-                       for (eq, pde_indvar, integration_indvar) in zip(colloc_equation,
-        pinnrep.pde_indvars,
-        pinnrep.pde_integration_vars)] for colloc_equation in colloc_equations]
+                                       for (eq, pde_indvar, integration_indvar) in zip(
+                                          colloc_equation,
+                                          pinnrep.pde_indvars,
+                                          pinnrep.pde_integration_vars)]
+                                      for colloc_equation in colloc_equations]
 
     return datafree_colloc_loss_functions
 end
@@ -132,22 +91,10 @@ function get_symbols(dataset, depvars, eqs)
     return to_subs, tobe_subs
 end
 
-function LogDensityProblems.logdensity(Tar::PDELogTargetDensity, θ)
-    # for parameter estimation neccesarry to use multioutput case
-    if Tar.L2_loss2 === nothing
-        return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) +
-               priorlogpdf(Tar, θ) + L2LossData(Tar, θ)
-    else
-        return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) +
-               priorlogpdf(Tar, θ) + L2LossData(Tar, θ) +
-               Tar.L2_loss2(setparameters(Tar, θ), Tar.allstd)
-    end
-end
-
-function setparameters(Tar::PDELogTargetDensity, θ)
-    names = Tar.names
-    ps_new = θ[1:(end - Tar.extraparams)]
-    ps = Tar.init_params
+@views function setparameters(ltd::PDELogTargetDensity, θ)
+    names = ltd.names
+    ps_new = θ[1:(end - ltd.extraparams)]
+    ps = ltd.init_params
 
     # multioutput case for Lux chains, for each depvar ps would contain Lux ComponentVectors
     # which we use for mapping current ahmc sampled vector of parameters onto NNs
@@ -155,83 +102,89 @@ function setparameters(Tar::PDELogTargetDensity, θ)
     Luxparams = [vector_to_parameters(ps_new[((i += length(ps[x])) - length(ps[x]) + 1):i],
                      ps[x]) for x in names]
 
-    a = ComponentArrays.ComponentArray(NamedTuple{Tar.names}(i for i in Luxparams))
+    a = ComponentArray(NamedTuple{ltd.names}(i for i in Luxparams))
 
-    if Tar.extraparams > 0
-        b = θ[(end - Tar.extraparams + 1):end]
-        return ComponentArrays.ComponentArray(;
-            depvar = a,
-            p = b)
+    if ltd.extraparams > 0
+        return ComponentArray(; depvar = a, p = θ[(end - ltd.extraparams + 1):end])
     else
-        return ComponentArrays.ComponentArray(;
-            depvar = a)
+        return ComponentArray(; depvar = a)
     end
 end
 
-LogDensityProblems.dimension(Tar::PDELogTargetDensity) = Tar.dim
+LogDensityProblems.dimension(ltd::PDELogTargetDensity) = ltd.dim
 
 function LogDensityProblems.capabilities(::PDELogTargetDensity)
     LogDensityProblems.LogDensityOrder{1}()
 end
 
 # L2 losses loglikelihood(needed mainly for ODE parameter estimation)
-function L2LossData(Tar::PDELogTargetDensity, θ)
-    Φ = Tar.Φ
-    init_params = Tar.init_params
-    dataset = Tar.dataset
-    sumt = 0
-    L2stds = Tar.allstd[3]
+function L2LossData(ltd::PDELogTargetDensity, θ)
+    Φ = ltd.Φ
+    init_params = ltd.init_params
+    dataset = ltd.dataset
+    L2stds = ltd.allstd[3]
     # each dep var has a diff dataset depending on its indep var and their domains
     # these datasets are matrices of first col-dep var and remaining cols-all indep var
-    # Tar.init_params is needed to construct a vector of parameters into a ComponentVector
+    # ltd.init_params is needed to construct a vector of parameters into a ComponentVector
 
     # dataset of form Vector[matrix_x, matrix_y, matrix_z]
     # matrix_i is of form [i,indvar1,indvar2,..] (needed in case if heterogenous domains)
-    # note that indvar1,indvar2.. cols can be different values for different depvar matrices
-    # dataset,phi order follows pinnrep.depvars orders of variables (order of declaration in @variables macro)
 
     # Phi is the trial solution for each NN in chain array
     # Creating logpdf( MvNormal(Phi(t,θ),std), dataset[i] )
-    # dataset[i][:, 2:end] -> indepvar cols of a particular depvar's dataset 
+    # dataset[i][:, 2:end] -> indepvar cols of a particular depvar's dataset
     # dataset[i][:, 1] -> depvar col of depvar's dataset
 
-    if Tar.extraparams > 0
-        for i in eachindex(Φ)
-            sumt += logpdf(
-                MvNormal(
-                    Φ[i](dataset[i][:, 2:end]',
-                        vector_to_parameters(θ[1:(end - Tar.extraparams)],
-                            init_params)[Tar.names[i]])[1,
-                        :],
-                    LinearAlgebra.Diagonal(abs2.(ones(size(dataset[i])[1]) .*
-                                                 L2stds[i]))),
-                dataset[i][:, 1])
-        end
-        return sumt
+    ltd.extraparams ≤ 0 && return false
+
+    sumt = 0
+    for i in eachindex(Φ)
+        sumt += logpdf(
+            MvNormal(
+                Φ[i](dataset[i][:, 2:end]',
+                    vector_to_parameters(θ[1:(end - ltd.extraparams)], init_params)[ltd.names[i]])[
+                    1, :],
+                Diagonal(abs2.(ones(size(dataset[i])[1]) .* L2stds[i]))),
+            dataset[i][:, 1])
     end
-    return 0
+    return sumt
 end
 
 # priors for NN parameters + ODE constants
-function priorlogpdf(Tar::PDELogTargetDensity, θ)
-    allparams = Tar.priors
+function priorlogpdf(ltd::PDELogTargetDensity, θ)
+    allparams = ltd.priors
     # Vector of ode parameters priors
     invpriors = allparams[2:end]
-
-    # nn weights
     nnwparams = allparams[1]
 
-    if Tar.extraparams > 0
-        invlogpdf = sum(
-            logpdf(invpriors[length(θ) - i + 1], θ[i])
-            for i in (length(θ) - Tar.extraparams + 1):length(θ);
-            init = 0.0)
+    ltd.extraparams ≤ 0 && return logpdf(nnwparams, θ)
 
-        return (invlogpdf
-                +
-                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
+    invlogpdf = sum((length(θ) - ltd.extraparams + 1):length(θ)) do i
+        logpdf(invpriors[length(θ) - i + 1], θ[i])
+    end
+
+    return invlogpdf + logpdf(nnwparams, θ[1:(length(θ) - ltd.extraparams)])
+end
+
+function integratorchoice(Integratorkwargs, initial_ϵ)
+    Integrator = Integratorkwargs[:Integrator]
+    if Integrator == JitteredLeapfrog
+        jitter_rate = Integratorkwargs[:jitter_rate]
+        Integrator(initial_ϵ, jitter_rate)
+    elseif Integrator == TemperedLeapfrog
+        tempering_rate = Integratorkwargs[:tempering_rate]
+        Integrator(initial_ϵ, tempering_rate)
+    else
+        Integrator(initial_ϵ)
+    end
+end
+
+function adaptorchoice(Adaptor, mma, ssa)
+    if Adaptor != AdvancedHMC.NoAdaptation()
+        Adaptor(mma, ssa)
+    else
+        AdvancedHMC.NoAdaptation()
     end
-    return logpdf(nnwparams, θ)
 end
 
 function inference(samples, pinnrep, saveats, numensemble, ℓπ)
@@ -298,82 +251,69 @@ function inference(samples, pinnrep, saveats, numensemble, ℓπ)
     return ensemblecurves, estimatedLuxparams, estimated_params, timepoints
 end
 
-function integratorchoice(Integratorkwargs, initial_ϵ)
-    Integrator = Integratorkwargs[:Integrator]
-    if Integrator == JitteredLeapfrog
-        jitter_rate = Integratorkwargs[:jitter_rate]
-        Integrator(initial_ϵ, jitter_rate)
-    elseif Integrator == TemperedLeapfrog
-        tempering_rate = Integratorkwargs[:tempering_rate]
-        Integrator(initial_ϵ, tempering_rate)
-    else
-        Integrator(initial_ϵ)
-    end
-end
-
-function adaptorchoice(Adaptor, mma, ssa)
-    if Adaptor != AdvancedHMC.NoAdaptation()
-        Adaptor(mma, ssa)
-    else
-        AdvancedHMC.NoAdaptation()
-    end
-end
-
 """
     ahmc_bayesian_pinn_pde(pde_system, discretization;
-            draw_samples = 1000,
-            bcstd = [0.01], l2std = [0.05],
-            phystd = [0.05], priorsNNw = (0.0, 2.0),
-            param = [], nchains = 1, Kernel = HMC(0.1, 30),
-            Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-                Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-            Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
-            numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)               
+        draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],
+        priorsNNw = (0.0, 2.0), param = [], nchains = 1, Kernel = HMC(0.1, 30),
+        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+        Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
+        numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)
 
 ## NOTES
 
 * Dataset is required for accurate Parameter estimation + solving equations.
-* Returned solution is a BPINNsolution consisting of Ensemble solution, estimated PDE and NN parameters
-  for chosen `saveats` grid spacing and last n = `numensemble` samples in Chain. the complete set of samples
-  in the MCMC chain is returned as `fullsolution`,  refer `BPINNsolution` for more details.
+* Returned solution is a BPINNsolution consisting of Ensemble solution, estimated PDE and NN
+  parameters for chosen `saveats` grid spacing and last n = `numensemble` samples in Chain.
+  the complete set of samples in the MCMC chain is returned as `fullsolution`,  refer
+  `BPINNsolution` for more details.
 
 ## Positional Arguments
 
 * `pde_system`: ModelingToolkit defined PDE equation or system of equations.
-* `discretization`: BayesianPINN discretization for the given pde_system, Neural Network and training strategy.
+* `discretization`: BayesianPINN discretization for the given pde_system, Neural Network and
+  training strategy.
 
 ## Keyword Arguments
 
-* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
-* `bcstd`: Vector of standard deviations of BPINN prediction against Initial/Boundary Condition equations.
-* `l2std`: Vector of standard deviations of BPINN prediction against L2 losses/Dataset for each dependant variable of interest.
-* `phystd`: Vector of standard deviations of BPINN prediction against Chosen Underlying PDE equations.
-* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of BPINN are Normal Distributions by default.
+* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are
+  ~2/3 of draw samples)
+* `bcstd`: Vector of standard deviations of BPINN prediction against Initial/Boundary
+  Condition equations.
+* `l2std`: Vector of standard deviations of BPINN prediction against L2 losses/Dataset for
+  each dependant variable of interest.
+* `phystd`: Vector of standard deviations of BPINN prediction against Chosen Underlying PDE
+  equations.
+* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of
+  BPINN are Normal Distributions by default.
 * `param`: Vector of chosen PDE's parameter's Distributions in case of Inverse problems.
 * `nchains`: number of chains you want to sample.
-* `Kernel`: Choice of MCMC Sampling Algorithm object HMC/NUTS/HMCDA (AdvancedHMC.jl implementations).
-* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-   Note: Target percentage(in decimal) of iterations in which the proposals are accepted (0.8 by default).
-* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-* `saveats`: Grid spacing for each independent variable for evaluation of ensemble solution, estimated parameters.
-* `numensemble`: Number of last samples to take for creation of ensemble solution, estimated parameters.
+* `Kernel`: Choice of MCMC Sampling Algorithm object HMC/NUTS/HMCDA (AdvancedHMC.jl
+  implementations).
+* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer:
+  https://turinglang.org/AdvancedHMC.jl/stable/. Note: Target percentage(in decimal) of
+  iterations in which the proposals are accepted (0.8 by default).
+* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer:
+  https://turinglang.org/AdvancedHMC.jl/stable/
+* `saveats`: Grid spacing for each independent variable for evaluation of ensemble solution,
+  estimated parameters.
+* `numensemble`: Number of last samples to take for creation of ensemble solution, estimated
+  parameters.
 * `progress`: controls whether to show the progress meter or not.
 * `verbose`: controls the verbosity. (Sample call args in AHMC).
 
-## Warnings
+!!! warning
 
-* AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
+    AdvancedHMC.jl is still developing convenience structs so might need changes on new
+    releases.
 """
 function ahmc_bayesian_pinn_pde(pde_system, discretization;
-        draw_samples = 1000,
-        bcstd = [0.01], l2std = [0.05],
-        phystd = [0.05], phystdnew = [0.05], priorsNNw = (0.0, 2.0),
-        param = [], nchains = 1, Kernel = HMC(0.1, 30),
-        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+        draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],
+        phynewstd = [0.05], priorsNNw = (0.0, 2.0), param = [], nchains = 1,
+        Kernel = HMC(0.1, 30), Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
-        numensemble = floor(Int, draw_samples / 3), Dict_differentials = nothing,
-        progress = false, verbose = false)
+        numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)
     pinnrep = symbolic_discretize(pde_system, discretization)
     dataset_pde, dataset_bc = discretization.dataset
 
@@ -385,31 +325,31 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         # add case for if parameters present in bcs?
 
         train_sets_pde = get_dataset_train_points(pde_system.eqs,
-                dataset_pde,
-                pinnrep)
-        colloc_train_sets = [[hcat(train_sets_pde[i][:, j]...)' for i in eachindex(datafree_colloc_loss_functions[1])] for j in eachindex(datafree_colloc_loss_functions)]
+            dataset_pde,
+            pinnrep)
+        colloc_train_sets = [[hcat(train_sets_pde[i][:, j]...)'
+                              for i in eachindex(datafree_colloc_loss_functions[1])]
+                             for j in eachindex(datafree_colloc_loss_functions)]
 
         # for each datafree_colloc_loss_function create loss_functions by passing dataset's indvar coords as train_sets_pde.
         # placeholder strategy = GridTraining(0.1), datafree_bc_loss_function and train_sets_bc must be nothing
         # order of indvar coords will be same as corresponding depvar coords values in dataset provided in get_lossy() call.
         pde_loss_function_points = [merge_strategy_with_loglikelihood_function(
-            pinnrep,
-            GridTraining(0.1),
-            datafree_colloc_loss_functions[i],
-            nothing;
-            train_sets_pde = colloc_train_sets[i],
-            train_sets_bc = nothing)[1]
-                              for i in eachindex(datafree_colloc_loss_functions)]
-
-        function L2_loss2(θ, allstd)
-            stdpdesnew = allstd[4]
-
+                                        pinnrep,
+                                        GridTraining(0.1),
+                                        datafree_colloc_loss_functions[i],
+                                        nothing;
+                                        train_sets_pde = colloc_train_sets[i],
+                                        train_sets_bc = nothing)[1]
+                                    for i in eachindex(datafree_colloc_loss_functions)]
+
+        function L2_loss2(θ, phynewstd)
             # first vector of losses,from tuple -> pde losses, first[1] pde loss
-            pde_loglikelihoods = [sum([pde_loss_function(θ, stdpdesnew[i])
+            pde_loglikelihoods = [sum([pde_loss_function(θ, phynewstd[i])
                                        for (i, pde_loss_function) in enumerate(pde_loss_functions)])
                                   for pde_loss_functions in pde_loss_function_points]
 
-            # bc_loglikelihoods = [sum([bc_loss_function(θ, stdpdesnew[i]) for (i, bc_loss_function) in enumerate(pde_loss_function_points[1])]) for pde_loss_function_points in pde_loss_functions]
+            # bc_loglikelihoods = [sum([bc_loss_function(θ, phynewstd[i]) for (i, bc_loss_function) in enumerate(pde_loss_function_points[1])]) for pde_loss_function_points in pde_loss_functions]
             #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
 
             return sum(pde_loglikelihoods)
@@ -432,7 +372,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     elseif discretization.param_estim && dataset isa Nothing
         throw(UndefVarError(:dataset))
     elseif discretization.param_estim && length(l2std) != length(pinnrep.depvars)
-        throw(error("L2 stds length must match number of dependant variables"))
+        error("L2 stds length must match number of dependant variables")
     end
 
     # for physics loglikelihood
@@ -440,15 +380,13 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     chain = discretization.chain
 
     if length(pinnrep.domains) != length(saveats)
-        throw(error("Number of independent variables must match saveat inference discretization steps"))
+        error("Number of independent variables must match saveat inference discretization steps")
     end
 
     # NN solutions for loglikelihood which is used for L2lossdata
     Φ = pinnrep.phi
 
-    if nchains < 1
-        throw(error("number of chains must be greater than or equal to 1"))
-    end
+    @assert nchains≥1 "number of chains must be greater than or equal to 1"
 
     # remove inv params take only NN params, AHMC uses Float64
     initial_nnθ = pinnrep.flat_init_params[1:(end - length(param))]
@@ -465,7 +403,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # add init_params for NN params
     priors = [
         MvNormal(priorsNNw[1] * ones(nparameters),
-        LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
+        Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
     ]
 
     # append Ode params to all paramvector - initial_θ
@@ -480,18 +418,10 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # vector in case of N-dimensional domains
     strategy = discretization.strategy
 
-    # dimensions would be total no of params,initial_nnθ for Lux namedTuples 
-    ℓπ = PDELogTargetDensity(nparameters,
-        strategy,
-        dataset,
-        priors,
-        [phystd, bcstd, l2std, phystdnew],
-        names,
-        ninv,
-        initial_nnθ,
-        full_weighted_loglikelihood,
-        newloss,
-        Φ)
+    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
+    ℓπ = PDELogTargetDensity(
+        nparameters, strategy, dataset, priors, [phystd, bcstd, l2std], phynewstd,
+        names, ninv, initial_nnθ, full_weighted_loglikelihood, newloss, Φ)
 
     Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
     Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
@@ -500,19 +430,22 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     metric = Metric(nparameters)
     hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
 
-    @info("Current Physics Log-likelihood : ",
-        ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ),
-            ℓπ.allstd))
-    @info("Current Prior Log-likelihood : ", priorlogpdf(ℓπ, initial_θ))
-    @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
-    if !(newloss isa Nothing)
-        @info("Current L2_LOSSY : ",
-            ℓπ.L2_loss2(setparameters(ℓπ, initial_θ),
-                ℓπ.allstd))
+    if verbose
+        @printf("Current Physics Log-likelihood : %g\n",
+            ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd))
+        @printf("Current Prior Log-likelihood : %g\n", priorlogpdf(ℓπ, initial_θ))
+        @printf("Current MSE against dataset Log-likelihood : %g\n",
+            L2LossData(ℓπ, initial_θ))
+        if !(newloss isa Nothing)
+            @printf("Current new loss : %g\n",
+                ℓπ.L2_loss2(setparameters(ℓπ, initial_θ),
+                    ℓπ.phynewstd))
+        end
     end
 
     # parallel sampling option
     if nchains != 1
+
         # Cache to store the chains
         bpinnsols = Vector{Any}(undef, nchains)
 
@@ -534,17 +467,10 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
 
             fullsolution = BPINNstats(mcmc_chain, samples, stats)
             ensemblecurves, estimnnparams, estimated_params, timepoints = inference(
-                samples,
-                pinnrep,
-                saveat,
-                numensemble,
-                ℓπ)
-
-            bpinnsols[i] = BPINNsolution(fullsolution,
-                ensemblecurves,
-                estimnnparams,
-                estimated_params,
-                timepoints)
+                samples, pinnrep, saveat, numensemble, ℓπ)
+
+            bpinnsols[i] = BPINNsolution(
+                fullsolution, ensemblecurves, estimnnparams, estimated_params, timepoints)
         end
         return bpinnsols
     else
@@ -561,30 +487,25 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         matrix_samples = hcat(samples...)
         mcmc_chain = MCMCChains.Chains(matrix_samples')
 
-        @info("Sampling Complete.")
-        @info("Final Physics Log-likelihood : ",
-            ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]),
-                ℓπ.allstd))
-        @info("Final Prior Log-likelihood : ", priorlogpdf(ℓπ, samples[end]))
-        @info("Final MSE against dataset Log-likelihood : ",
-            L2LossData(ℓπ, samples[end]))
-        if !(newloss isa Nothing)
-            @info("Final L2_LOSSY : ",
-                ℓπ.L2_loss2(setparameters(ℓπ, samples[end]),
-                    ℓπ.allstd))
+        if verbose
+            @printf("Sampling Complete.\n")
+            @printf("Final Physics Log-likelihood : %g\n",
+                ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd))
+            @printf("Final Prior Log-likelihood : %g\n", priorlogpdf(ℓπ, samples[end]))
+            @printf("Final MSE against dataset Log-likelihood : %g\n",
+                L2LossData(ℓπ, samples[end]))
+            if !(newloss isa Nothing)
+                @printf("Final L2_LOSSY : %g\n",
+                    ℓπ.L2_loss2(setparameters(ℓπ, samples[end]),
+                        ℓπ.phynewstd))
+            end
         end
 
         fullsolution = BPINNstats(mcmc_chain, samples, stats)
         ensemblecurves, estimnnparams, estimated_params, timepoints = inference(samples,
-            pinnrep,
-            saveats,
-            numensemble,
-            ℓπ)
-
-        return BPINNsolution(fullsolution,
-            ensemblecurves,
-            estimnnparams,
-            estimated_params,
-            timepoints)
+            pinnrep, saveats, numensemble, ℓπ)
+
+        return BPINNsolution(
+            fullsolution, ensemblecurves, estimnnparams, estimated_params, timepoints)
     end
-end
+end
\ No newline at end of file
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 8b996fce5c..5ac4213c92 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -1,72 +1,42 @@
-mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
-    P <: Vector{<:Distribution},
-    D <:
-    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
-}
+@concrete struct LogTargetDensity
     dim::Int
-    prob::SciMLBase.ODEProblem
-    chain::C
-    st::S
-    strategy::ST
-    dataset::D
-    priors::P
+    prob <: SciMLBase.ODEProblem
+    smodel <: StatefulLuxLayer
+    strategy <: AbstractTrainingStrategy
+    dataset <: Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
+    priors <: Vector{<:Distribution}
     phystd::Vector{Float64}
     phynewstd::Vector{Float64}
     l2std::Vector{Float64}
     autodiff::Bool
     physdt::Float64
     extraparams::Int
-    init_params::I
+    init_params <: Union{NamedTuple, ComponentArray}
     estim_collocate::Bool
+end
 
-    function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
-            dataset,
-            priors, phystd, phynewstd, l2std, autodiff, physdt, extraparams,
-            init_params::AbstractVector, estim_collocate)
-        new{
-            typeof(chain),
-            Nothing,
-            typeof(strategy),
-            typeof(init_params),
-            typeof(priors),
-            typeof(dataset)
-        }(dim,
-            prob,
-            chain,
-            nothing, strategy,
-            dataset,
-            priors,
-            phystd,
-            phynewstd,
-            l2std,
-            autodiff,
-            physdt,
-            extraparams,
-            init_params,
-            estim_collocate)
-    end
-    function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
-            dataset,
-            priors, phystd, phynewstd, l2std, autodiff, physdt, extraparams,
-            init_params::NamedTuple, estim_collocate)
-        new{
-            typeof(chain),
-            typeof(st),
-            typeof(strategy),
-            typeof(init_params),
-            typeof(priors),
-            typeof(dataset)
-        }(dim,
-            prob,
-            chain, st, strategy,
-            dataset, priors,
-            phystd, phynewstd,
-             l2std,
-            autodiff,
-            physdt,
-            extraparams,
-            init_params,
-            estim_collocate)
+"""
+NN OUTPUT AT t,θ ~ phi(t,θ).
+"""
+function (f::LogTargetDensity)(t::AbstractVector, θ)
+    θ = vector_to_parameters(θ, f.init_params)
+    dev = safe_get_device(θ)
+    t = safe_expand(dev, t)
+    u0 = f.prob.u0 |> dev
+    return u0 .+ (t' .- f.prob.tspan[1]) .* f.smodel(t', θ)
+end
+
+(f::LogTargetDensity)(t::Number, θ) = f([t], θ)[:, 1]
+
+"""
+Similar to ode_dfdx() in NNODE.
+"""
+function ode_dfdx(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
+    if autodiff
+        return ForwardDiff.jacobian(Base.Fix2(phi, θ), t)
+    else
+        ϵ = sqrt(eps(eltype(t)))
+        return (phi(t .+ ϵ, θ) .- phi(t, θ)) ./ ϵ
     end
 end
 
@@ -74,344 +44,239 @@ end
 Function needed for converting vector of sampled parameters into ComponentVector in case of Lux chain output, derivatives
 the sampled parameters are of exotic type `Dual` due to ForwardDiff's autodiff tagging.
 """
-function vector_to_parameters(ps_new::AbstractVector,
-        ps::Union{NamedTuple, ComponentArrays.ComponentVector})
-    @assert length(ps_new) == Lux.parameterlength(ps)
+function vector_to_parameters(ps_new::AbstractVector, ps::Union{NamedTuple, ComponentArray})
+    @assert length(ps_new) == LuxCore.parameterlength(ps)
     i = 1
     function get_ps(x)
         z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
         i += length(x)
         return z
     end
-    return Functors.fmap(get_ps, ps)
+    return fmap(get_ps, ps)
 end
 
-vector_to_parameters(ps_new::AbstractVector, ps::AbstractVector) = ps_new
+vector_to_parameters(ps_new::AbstractVector, _::AbstractVector) = ps_new
 
-function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
-    if Tar.estim_collocate
-        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ) +
-               L2loss2(Tar, θ)
-    else
-        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
-    end
+function LogDensityProblems.logdensity(ltd::LogTargetDensity, θ)
+    ldensity = physloglikelihood(ltd, θ) + priorweights(ltd, θ) + L2LossData(ltd, θ)
+    ltd.estim_collocate && return ldensity + L2loss2(ltd, θ)
+    return ldensity
 end
 
-LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
+LogDensityProblems.dimension(ltd::LogTargetDensity) = ltd.dim
 
 function LogDensityProblems.capabilities(::LogTargetDensity)
-    LogDensityProblems.LogDensityOrder{1}()
+    return LogDensityProblems.LogDensityOrder{1}()
 end
 
 """
 suggested extra loss function for ODE solver case
 """
-function L2loss2(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
+@views function L2loss2(ltd::LogTargetDensity, θ)
+    ltd.extraparams ≤ 0 && return false  # XXX: type-stability?
 
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        autodiff = Tar.autodiff
-        # Timepoints to enforce Physics 
-        t = Tar.dataset[end]
-        u1 = Tar.dataset[2]
-        û = Tar.dataset[1]
-
-        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        if length(Tar.prob.u0) == 1
-            physsol = [f(û[i],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        else
-            physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
-                       for i in 1:length(û)]
-        end
-        #form of NN output matrix output dim x n 
-        deri_physsol = reduce(hcat, physsol)
-   
-        physlogprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # can add phystdnew[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.phynewstd[i]) .*
-                        ones(length(nnsol[i, :]))))),
-                nnsol[i, :])
-        end
-        return physlogprob
+    f = ltd.prob.f
+    t = ltd.dataset[end]
+    u1 = ltd.dataset[2]
+    û = ltd.dataset[1]
+
+    nnsol = ode_dfdx(ltd, t, θ[1:(length(θ) - ltd.extraparams)], ltd.autodiff)
+
+    ode_params = ltd.extraparams == 1 ? θ[((length(θ) - ltd.extraparams) + 1)] :
+                 θ[((length(θ) - ltd.extraparams) + 1):length(θ)]
+
+    physsol = if length(ltd.prob.u0) == 1
+        [f(û[i], ode_params, tᵢ) for (i, tᵢ) in enumerate(t)]
     else
-        return 0
+        [f([û[i], u1[i]], ode_params, tᵢ) for (i, tᵢ) in enumerate(t)]
+    end
+    # form of NN output matrix output dim x n
+    deri_physsol = reduce(hcat, physsol)
+    T = promote_type(eltype(deri_physsol), eltype(nnsol))
+
+    physlogprob = T(0)
+    for i in 1:length(ltd.prob.u0)
+        physlogprob += logpdf(
+            MvNormal(deri_physsol[i, :],
+                Diagonal(abs2.(T(ltd.phynewstd[i]) .* ones(T, length(nnsol[i, :]))))),
+            nnsol[i, :]
+        )
     end
+    return physlogprob
 end
 
 """
 L2 loss loglikelihood(needed for ODE parameter estimation).
 """
-function L2LossData(Tar::LogTargetDensity, θ)
-    # check if dataset is provided
-    if Tar.dataset isa Vector{Nothing} || Tar.extraparams == 0
-        return 0
-    else
-        # matrix(each row corresponds to vector u's rows)
-        nn = Tar(Tar.dataset[end], θ[1:(length(θ) - Tar.extraparams)])
-
-        L2logprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # for u[i] ith vector must be added to dataset, nn[1,:] is the dx in lotka_volterra
-            L2logprob += logpdf(
-                MvNormal(nn[i, :],
-                    LinearAlgebra.Diagonal(abs2.(Tar.l2std[i] .*
-                                                 ones(length(Tar.dataset[i]))))),
-                Tar.dataset[i])
-        end
-        return L2logprob
+@views function L2LossData(ltd::LogTargetDensity, θ)
+    (ltd.dataset isa Vector{Nothing} || ltd.extraparams == 0) && return 0
+
+    # matrix(each row corresponds to vector u's rows)
+    nn = ltd(ltd.dataset[end], θ[1:(length(θ) - ltd.extraparams)])
+    T = eltype(nn)
+
+    L2logprob = zero(T)
+    for i in 1:length(ltd.prob.u0)
+        # for u[i] ith vector must be added to dataset,nn[1, :] is the dx in lotka_volterra
+        L2logprob += logpdf(
+            MvNormal(
+                nn[i, :],
+                Diagonal(abs2.(T(ltd.l2std[i]) .* ones(T, length(ltd.dataset[i]))))
+            ),
+            ltd.dataset[i]
+        )
     end
+    return L2logprob
 end
 
 """
 Physics loglikelihood over problem timespan + dataset timepoints.
 """
-function physloglikelihood(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-    p = Tar.prob.p
-    tspan = Tar.prob.tspan
-    autodiff = Tar.autodiff
-    strategy = Tar.strategy
+function physloglikelihood(ltd::LogTargetDensity, θ)
+    (; f, p, tspan) = ltd.prob
+    (; autodiff, strategy) = ltd
 
     # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+    if ltd.extraparams > 0
+        ode_params = ltd.extraparams == 1 ? θ[((length(θ) - ltd.extraparams) + 1)] :
+                     θ[((length(θ) - ltd.extraparams) + 1):length(θ)]
     else
-        ode_params = p == SciMLBase.NullParameters() ? [] : p
+        ode_params = p isa SciMLBase.NullParameters ? Float64[] : p
     end
 
-    return getlogpdf(strategy, Tar, f, autodiff, tspan, ode_params, θ)
+    return getlogpdf(strategy, ltd, f, autodiff, tspan, ode_params, θ)
 end
 
-function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
-        tspan,
-        ode_params, θ)
-    if Tar.dataset isa Vector{Nothing}
-        t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
-    else
-        t = vcat(collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]),
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
+function getlogpdf(strategy::GridTraining, ltd::LogTargetDensity, f, autodiff::Bool,
+        tspan, ode_params, θ)
+    ts = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
+    t = ltd.dataset isa Vector{Nothing} ? ts : vcat(ts, ltd.dataset[end])
+    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
 end
 
-function getlogpdf(strategy::StochasticTraining,
-        Tar::LogTargetDensity,
-        f,
-        autodiff::Bool,
-        tspan,
-        ode_params,
-        θ)
-    if Tar.dataset isa Vector{Nothing}
-        t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
-    else
-        t = vcat([(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)],
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
+function getlogpdf(strategy::StochasticTraining, ltd::LogTargetDensity,
+        f, autodiff::Bool, tspan, ode_params, θ)
+    T = promote_type(eltype(tspan[1]), eltype(tspan[2]))
+    samples = (tspan[2] - tspan[1]) .* rand(T, strategy.points) .+ tspan[1]
+    t = ltd.dataset isa Vector{Nothing} ? samples : vcat(samples, ltd.dataset[end])
+    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
 end
 
-function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
-        autodiff::Bool,
-        tspan,
-        ode_params, θ)
-    function integrand(t::Number, θ)
-        innerdiff(Tar, f, autodiff, [t], θ, ode_params)
-    end
+function getlogpdf(strategy::QuadratureTraining, ltd::LogTargetDensity, f, autodiff::Bool,
+        tspan, ode_params, θ)
+    integrand(t::Number, θ) = innerdiff(ltd, f, autodiff, [t], θ, ode_params)
     intprob = IntegralProblem(
-        integrand, (tspan[1], tspan[2]), θ; nout = length(Tar.prob.u0))
-    sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
-    sum(sol.u)
+        integrand, (tspan[1], tspan[2]), θ; nout = length(ltd.prob.u0))
+    sol = solve(intprob, QuadGKJL(); strategy.abstol, strategy.reltol)
+    return sum(sol.u)
 end
 
-function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
-        autodiff::Bool,
-        tspan,
-        ode_params, θ)
-    minT = tspan[1]
-    maxT = tspan[2]
-
+function getlogpdf(strategy::WeightedIntervalTraining, ltd::LogTargetDensity, f,
+        autodiff::Bool, tspan, ode_params, θ)
+    minT, maxT = tspan
     weights = strategy.weights ./ sum(strategy.weights)
-
     N = length(weights)
-    points = strategy.points
-
     difference = (maxT - minT) / N
 
-    data = Float64[]
+    ts = eltype(difference)[]
     for (index, item) in enumerate(weights)
-        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
+        temp_data = rand(1, trunc(Int, strategy.points * item)) .* difference .+ minT .+
                     ((index - 1) * difference)
-        data = append!(data, temp_data)
+        append!(ts, temp_data)
     end
 
-    if Tar.dataset isa Vector{Nothing}
-        t = data
-    else
-        t = vcat(data,
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
+    t = ltd.dataset isa Vector{Nothing} ? ts : vcat(ts, ltd.dataset[end])
+    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
 end
 
 """
 MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ.
 """
-function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
+@views function innerdiff(ltd::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
         ode_params)
+    # ltd used for phi and LogTargetDensity object attributes access
+    out = ltd(t, θ[1:(length(θ) - ltd.extraparams)])
 
-    # Tar used for phi and LogTargetDensity object attributes access
-    out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
-
-    # # reject samples case(write clear reason why)
-    if any(isinf, out[:, 1]) || any(isinf, ode_params)
-        return -Inf
-    end
+    # reject samples case(write clear reason why)
+    (any(isinf, out[:, 1]) || any(isinf, ode_params)) && return convert(eltype(out), -Inf)
 
     # this is a vector{vector{dx,dy}}(handle case single u(float passed))
     if length(out[:, 1]) == 1
-        physsol = [f(out[:, i][1],
-                       ode_params,
-                       t[i])
-                   for i in 1:length(out[1, :])]
+        physsol = [f(out[:, i][1], ode_params, t[i]) for i in 1:length(out[1, :])]
     else
-        physsol = [f(out[:, i],
-                       ode_params,
-                       t[i])
-                   for i in 1:length(out[1, :])]
+        physsol = [f(out[:, i], ode_params, t[i]) for i in 1:length(out[1, :])]
     end
     physsol = reduce(hcat, physsol)
 
-    nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+    nnsol = ode_dfdx(ltd, t, θ[1:(length(θ) - ltd.extraparams)], autodiff)
 
     vals = nnsol .- physsol
+    T = eltype(vals)
 
-    # N dimensional vector if N outputs for NN(each row has logpdf of u[i] where u is vector of dependant variables)
+    # N dimensional vector if N outputs for NN(each row has logpdf of u[i] where u is vector
+    # of dependant variables)
     return [logpdf(
                 MvNormal(vals[i, :],
-                    LinearAlgebra.Diagonal(abs2.(Tar.phystd[i] .*
-                                                 ones(length(vals[i, :]))))),
-                zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
+                    Diagonal(abs2.(T(ltd.phystd[i]) .* ones(T, length(vals[i, :]))))),
+                zeros(T, length(vals[i, :]))
+            ) for i in 1:length(ltd.prob.u0)]
 end
 
 """
 Prior logpdf for NN parameters + ODE constants.
 """
-function priorweights(Tar::LogTargetDensity, θ)
-    allparams = Tar.priors
-    # nn weights
-    nnwparams = allparams[1]
-
-    if Tar.extraparams > 0
-        # Vector of ode parameters priors
-        invpriors = allparams[2:end]
-
-        invlogpdf = sum(
-            logpdf(invpriors[length(θ) - i + 1], θ[i])
-            for i in (length(θ) - Tar.extraparams + 1):length(θ);
-            init = 0.0)
-
-        return (invlogpdf
-                +
-                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
-    else
-        return logpdf(nnwparams, θ)
-    end
-end
+@views function priorweights(ltd::LogTargetDensity, θ)
+    allparams = ltd.priors
+    nnwparams = allparams[1] # nn weights
 
-function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    return init_params, chain, st
-end
+    ltd.extraparams ≤ 0 && return logpdf(nnwparams, θ)
 
-function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    return θ, chain, st
-end
+    # Vector of ode parameters priors
+    invpriors = allparams[2:end]
 
-"""
-NN OUTPUT AT t,θ ~ phi(t,θ).
-"""
-function (f::LogTargetDensity{C, S})(t::AbstractVector,
-        θ) where {C <: Lux.AbstractExplicitLayer, S}
-    θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
+    invlogpdf = sum(
+        logpdf(invpriors[length(θ) - i + 1], θ[i])
+    for i in (length(θ) - ltd.extraparams + 1):length(θ))
+
+    return invlogpdf + logpdf(nnwparams, θ[1:(length(θ) - ltd.extraparams)])
 end
 
-function (f::LogTargetDensity{C, S})(t::Number,
-        θ) where {C <: Lux.AbstractExplicitLayer, S}
-    θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y
+function generate_ltd(chain::AbstractLuxLayer, init_params)
+    return init_params, chain, LuxCore.initialstates(Random.default_rng(), chain)
 end
 
-"""
-Similar to ode_dfdx() in NNODE.
-"""
-function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
-    if autodiff
-        hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...)
-    else
-        (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
-    end
+function generate_ltd(chain::AbstractLuxLayer, ::Nothing)
+    θ, st = LuxCore.setup(Random.default_rng(), chain)
+    return θ, chain, st
 end
 
 function kernelchoice(Kernel, MCMCkwargs)
     if Kernel == HMCDA
-        δ, λ = MCMCkwargs[:δ], MCMCkwargs[:λ]
-        Kernel(δ, λ)
+        Kernel(MCMCkwargs[:δ], MCMCkwargs[:λ])
     elseif Kernel == NUTS
         δ, max_depth, Δ_max = MCMCkwargs[:δ], MCMCkwargs[:max_depth], MCMCkwargs[:Δ_max]
-        Kernel(δ, max_depth = max_depth, Δ_max = Δ_max)
-    else
-        # HMC
-        n_leapfrog = MCMCkwargs[:n_leapfrog]
-        Kernel(n_leapfrog)
+        Kernel(δ; max_depth, Δ_max)
+    else # HMC
+        Kernel(MCMCkwargs[:n_leapfrog])
     end
 end
 
 """
-    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
-                        dataset = [nothing],init_params = nothing,
-                        draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
-                        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
-                        param = [], nchains = 1, autodiff = false, Kernel = HMC,
-                        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-                                         Metric = DiagEuclideanMetric,
-                                         targetacceptancerate = 0.8),
-                        Integratorkwargs = (Integrator = Leapfrog,),
-                        MCMCkwargs = (n_leapfrog = 30,),
-                        progress = false, verbose = false)
+    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining, dataset = [nothing],
+                           init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0f0,
+                           l2std = [0.05], phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
+                           param = [], nchains = 1, autodiff = false, Kernel = HMC,
+                           Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+                               Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+                           Integratorkwargs = (Integrator = Leapfrog,),
+                           MCMCkwargs = (n_leapfrog = 30,), progress = false,
+                           verbose = false)
 
 !!! warn
 
-    Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the out-of-place form, i.e.
-    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the `ahmc_bayesian_pinn_ode()`
-    will exit with an error.
+    Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the
+    out-of-place form, i.e. `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared
+    out-of-place, then `ahmc_bayesian_pinn_ode()` will exit with an error.
 
 ## Example
 
@@ -463,22 +328,29 @@ Incase you are only solving the Equations for solution, do not provide dataset
 
 ## Keyword Arguments
 
-* `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
-* `init_params`: initial parameter values for BPINN (ideally for multiple chains different initializations preferred)
+* `strategy`: The training strategy used to choose the points for the evaluations. By
+  default GridTraining is used with given physdt discretization.
+* `init_params`: initial parameter values for BPINN (ideally for multiple chains different
+  initializations preferred)
 * `nchains`: number of chains you want to sample
-* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
+* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are
+  ~2/3 of draw samples)
 * `l2std`: standard deviation of BPINN prediction against L2 losses/Dataset
 * `phystd`: standard deviation of BPINN prediction against Chosen Underlying ODE System
 * `phynewstd`: standard deviation of new loss func term
-* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of BPINN are Normal Distributions by default.
+* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of
+  BPINN are Normal Distributions by default.
 * `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
 * `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
 * `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
 * `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implementations HMC/NUTS/HMCDA)
-* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-    Note: Target percentage(in decimal) of iterations in which the proposals are accepted (0.8 by default)
-* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's(HMC/NUTS/HMCDA) Arguments, as follows :
+* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`.
+  Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`.
+  Refer: https://turinglang.org/AdvancedHMC.jl/stable/ Note: Target percentage (in decimal)
+  of iterations in which the proposals are accepted (0.8 by default)
+* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's (HMC/NUTS/HMCDA)
+  Arguments, as follows :
     * `n_leapfrog`: number of leapfrog steps for HMC
     * `δ`: target acceptance probability for NUTS and HMCDA
     * `λ`: target trajectory length for HMCDA
@@ -488,67 +360,53 @@ Incase you are only solving the Equations for solution, do not provide dataset
 * `progress`: controls whether to show the progress meter or not.
 * `verbose`: controls the verbosity. (Sample call args in AHMC)
 
-## Warnings
+!!! warning
 
-* AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
+    AdvancedHMC.jl is still developing convenience structs so might need changes on new
+    releases.
 """
-function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
-        strategy = GridTraining, dataset = [nothing],
-        init_params = nothing, draw_samples = 1000,
-        physdt = 1 / 20.0, l2std = [0.05],
-        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
-        param = [], nchains = 1, autodiff = false,
-        Kernel = HMC,
+function ahmc_bayesian_pinn_ode(
+        prob::SciMLBase.ODEProblem, chain; strategy = GridTraining, dataset = [nothing],
+        init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0, l2std = [0.05],
+        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0), param = [], nchains = 1,
+        autodiff = false, Kernel = HMC,
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-        Integratorkwargs = (Integrator = Leapfrog,),
-        MCMCkwargs = (n_leapfrog = 30,),
-        progress = false, verbose = false,
-        estim_collocate = false)
-    !(chain isa Lux.AbstractExplicitLayer) &&
-        (chain = adapt(FromFluxAdaptor(false, false), chain))
-    # NN parameter prior mean and variance(PriorsNN must be a tuple)
-    if isinplace(prob)
-        throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
-    end
+        Integratorkwargs = (Integrator = Leapfrog,), MCMCkwargs = (n_leapfrog = 30,),
+        progress = false, verbose = false, estim_collocate = false)
+    @assert !isinplace(prob) "The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."
+
+    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
 
     strategy = strategy == GridTraining ? strategy(physdt) : strategy
 
     if dataset != [nothing] &&
        (length(dataset) < 2 || !(dataset isa Vector{<:Vector{<:AbstractFloat}}))
-        throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
+        error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}")
     end
 
     if dataset != [nothing] && param == []
         println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
     elseif dataset == [nothing] && param != []
-        throw(error("Dataset Required for Parameter Estimation."))
+        error("Dataset Required for Parameter Estimation.")
     end
 
-    if chain isa Lux.AbstractExplicitLayer
-        # Lux-Named Tuple
-        initial_nnθ, recon, st = generate_Tar(chain, init_params)
-    else
-        error("Only Lux.AbstractExplicitLayer Neural networks are supported")
-    end
+    initial_nnθ, chain, st = generate_ltd(chain, init_params)
 
-    if nchains > Threads.nthreads()
-        throw(error("number of chains is greater than available threads"))
-    elseif nchains < 1
-        throw(error("number of chains must be greater than 1"))
-    end
+    @assert nchains≤Threads.nthreads() "number of chains is greater than available threads"
+    @assert nchains≥1 "number of chains must be greater than 1"
 
     # eltype(physdt) cause needs Float64 for find_good_stepsize
     # Lux chain(using component array later as vector_to_parameter need namedtuple)
-    initial_θ = collect(eltype(physdt),
-        vcat(ComponentArrays.ComponentArray(initial_nnθ)))
+    T = eltype(physdt)
+    initial_θ = getdata(ComponentArray{T}(initial_nnθ))
 
     # adding ode parameter estimation
     nparameters = length(initial_θ)
     ninv = length(param)
     priors = [
-        MvNormal(priorsNNw[1] * ones(nparameters),
-        LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
+        MvNormal(T(priorsNNw[1]) * ones(T, nparameters),
+        Diagonal(abs2.(T(priorsNNw[2]) .* ones(T, nparameters))))
     ]
 
     # append Ode params to all paramvector
@@ -560,30 +418,25 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
     end
 
     t0 = prob.tspan[1]
+    smodel = StatefulLuxLayer{true}(chain, nothing, st)
     # dimensions would be total no of params,initial_nnθ for Lux namedTuples
-    ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
+    ℓπ = LogTargetDensity(nparameters, prob, smodel, strategy, dataset, priors,
         phystd, phynewstd, l2std, autodiff, physdt, ninv, initial_nnθ, estim_collocate)
 
-    try
-        ℓπ(t0, initial_θ[1:(nparameters - ninv)])
-    catch err
-        if isa(err, DimensionMismatch)
-            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
-        else
-            throw(err)
+    if verbose
+        @printf("Current Physics Log-likelihood: %g\n", physloglikelihood(ℓπ, initial_θ))
+        @printf("Current Prior Log-likelihood: %g\n", priorweights(ℓπ, initial_θ))
+        @printf("Current MSE against dataset Log-likelihood: %g\n",
+            L2LossData(ℓπ, initial_θ))
+        if estim_collocate
+            @printf("Current gradient loss against dataset Log-likelihood: %g\n",
+                L2loss2(ℓπ, initial_θ))
         end
     end
 
-    @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, initial_θ))
-    @info("Current Prior Log-likelihood : ", priorweights(ℓπ, initial_θ))
-    @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
-    if estim_collocate
-        @info("Current gradient loss against dataset Log-likelihood : ",
-            L2loss2(ℓπ, initial_θ))
-    end
-
-    Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
-    Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
+    Adaptor = Adaptorkwargs[:Adaptor]
+    Metric = Adaptorkwargs[:Metric]
+    targetacceptancerate = Adaptorkwargs[:targetacceptancerate]
 
     # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
     metric = Metric(nparameters)
@@ -598,8 +451,10 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
 
         Threads.@threads for i in 1:nchains
             # each chain has different initial NNparameter values(better posterior exploration)
-            initial_θ = vcat(randn(nparameters - ninv),
-                initial_θ[(nparameters - ninv + 1):end])
+            initial_θ = vcat(
+                randn(eltype(initial_θ), nparameters - ninv),
+                initial_θ[(nparameters - ninv + 1):end]
+            )
             initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
             integrator = integratorchoice(Integratorkwargs, initial_ϵ)
             adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
@@ -612,7 +467,7 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
 
             samplesc[i] = samples
             statsc[i] = stats
-            mcmc_chain = Chains(hcat(samples...)')
+            mcmc_chain = Chains(reduce(hcat, samples)')
             chains[i] = mcmc_chain
         end
 
@@ -628,13 +483,17 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
         samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
             adaptor; progress = progress, verbose = verbose)
 
-        @info("Sampling Complete.")
-        @info("Final Physics Log-likelihood : ", physloglikelihood(ℓπ, samples[end]))
-        @info("Final Prior Log-likelihood : ", priorweights(ℓπ, samples[end]))
-        @info("Final MSE against dataset Log-likelihood : ", L2LossData(ℓπ, samples[end]))
-        if estim_collocate
-            @info("Final gradient loss against dataset Log-likelihood : ",
-                L2loss2(ℓπ, samples[end]))
+        if verbose
+            println("Sampling Complete.")
+            @printf("Final Physics Log-likelihood: %g\n",
+                physloglikelihood(ℓπ, samples[end]))
+            @printf("Final Prior Log-likelihood: %g\n", priorweights(ℓπ, samples[end]))
+            @printf("Final MSE against dataset Log-likelihood: %g\n",
+                L2LossData(ℓπ, samples[end]))
+            if estim_collocate
+                @printf("Final gradient loss against dataset Log-likelihood: %g\n",
+                    L2loss2(ℓπ, samples[end]))
+            end
         end
 
         # return a chain(basic chain),samples and stats
diff --git a/src/discretize.jl b/src/discretize.jl
index 7eb6c97af0..5187a0638a 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -23,23 +23,14 @@ to
           end
       end)
 
-for Lux.AbstractExplicitLayer.
+for Lux.AbstractLuxLayer.
 """
 function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
-        eq_params = SciMLBase.NullParameters(),
-        param_estim = false,
-        default_p = nothing,
-        bc_indvars = pinnrep.indvars,
-        integrand = nothing,
-        dict_transformation_vars = nothing,
-        transformation_vars = nothing,
+        eq_params = SciMLBase.NullParameters(), param_estim = false, default_p = nothing,
+        bc_indvars = pinnrep.indvars, integrand = nothing,
+        dict_transformation_vars = nothing, transformation_vars = nothing,
         integrating_depvars = pinnrep.depvars)
-    @unpack indvars, depvars, dict_indvars, dict_depvars, dict_depvar_input,
-    phi, derivative, integral,
-    multioutput, init_params, strategy, eq_params,
-    param_estim, default_p = pinnrep
-
-    eltypeθ = eltype(pinnrep.flat_init_params)
+    (; depvars, dict_depvars, dict_depvar_input, phi, derivative, integral, multioutput, init_params, strategy, eq_params, param_estim, default_p) = pinnrep
 
     if integrand isa Nothing
         loss_function = parse_equation(pinnrep, eqs)
@@ -68,9 +59,6 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
         expr_θ = Expr[]
         expr_phi = Expr[]
 
-        acum = [0; accumulate(+, map(length, init_params))]
-        sep = [(acum[i] + 1):acum[i + 1] for i in 1:(length(acum) - 1)]
-
         for i in eachindex(depvars)
             push!(expr_θ, :($θ.depvar.$(depvars[i])))
             push!(expr_phi, :(phi[$i]))
@@ -138,34 +126,28 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
     end
     let_ex = Expr(:let, vars_eq, vcat_expr_loss_functions)
     push!(ex.args, let_ex)
-    expr_loss_function = :(($vars) -> begin
+    return :(($vars) -> begin
         $ex
     end)
 end
 
 """
-    build_loss_function(eqs, indvars, depvars, phi, derivative, init_params; bc_indvars=nothing)
+    build_loss_function(eqs, indvars, depvars, phi, derivative, init_params;
+        bc_indvars=nothing)
 
 Returns the body of loss function, which is the executable Julia function, for the main
 equation or boundary condition.
 """
 function build_loss_function(pinnrep::PINNRepresentation, eqs, bc_indvars)
-    @unpack eq_params, param_estim, default_p, phi, derivative, integral = pinnrep
+    (; eq_params, param_estim, default_p, phi, derivative, integral) = pinnrep
 
     bc_indvars = bc_indvars === nothing ? pinnrep.indvars : bc_indvars
 
-    expr_loss_function = build_symbolic_loss_function(pinnrep, eqs;
-        bc_indvars = bc_indvars,
-        eq_params = eq_params,
-        param_estim = param_estim,
-        default_p = default_p)
+    expr_loss_function = build_symbolic_loss_function(pinnrep, eqs; bc_indvars, eq_params,
+        param_estim, default_p)
     u = get_u()
     _loss_function = @RuntimeGeneratedFunction(expr_loss_function)
-    loss_function = (cord, θ) -> begin
-        _loss_function(cord, θ, phi, derivative, integral, u,
-            default_p)
-    end
-    return loss_function
+    return (cord, θ) -> _loss_function(cord, θ, phi, derivative, integral, u, default_p)
 end
 
 """
@@ -178,8 +160,7 @@ function generate_training_sets end
 
 function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, _indvars::Array,
         _depvars::Array)
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-        _depvars)
+    _, _, dict_indvars, dict_depvars, _ = get_vars(_indvars, _depvars)
     return generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars,
         dict_depvars)
 end
@@ -187,11 +168,7 @@ end
 # Generate training set in the domain and on the boundary
 function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::Dict,
         dict_depvars::Dict)
-    if dx isa Array
-        dxs = dx
-    else
-        dxs = fill(dx, length(domains))
-    end
+    dxs = dx isa Array ? dx : fill(dx, length(domains))
 
     spans = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, dxs)]
     dict_var_span = Dict([Symbol(d.variables) => infimum(d.domain):dx:supremum(d.domain)
@@ -201,12 +178,8 @@ function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::D
     bound_vars = get_variables(bcs, dict_indvars, dict_depvars)
 
     dif = [eltypeθ[] for i in 1:size(domains)[1]]
-    for _args in bound_vars
-        for (i, x) in enumerate(_args)
-            if x isa Number
-                push!(dif[i], x)
-            end
-        end
+    for _args in bound_vars, (i, x) in enumerate(_args)
+        x isa Number && push!(dif[i], x)
     end
     cord_train_set = collect.(spans)
     bc_data = map(zip(dif, cord_train_set)) do (d, c)
@@ -216,24 +189,20 @@ function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::D
     dict_var_span_ = Dict([Symbol(d.variables) => bc for (d, bc) in zip(domains, bc_data)])
 
     bcs_train_sets = map(bound_args) do bt
-        span = map(b -> get(dict_var_span, b, b), bt)
-        _set = adapt(eltypeθ,
-            hcat(vec(map(points -> collect(points), Iterators.product(span...)))...))
+        span = get.((dict_var_span,), bt, bt)
+        return reduce(hcat, vec(map(collect, Iterators.product(span...)))) |>
+               EltypeAdaptor{eltypeθ}()
     end
 
-    pde_vars = get_variables(eqs, dict_indvars, dict_depvars)
     pde_args = get_argument(eqs, dict_indvars, dict_depvars)
 
-    pde_train_set = adapt(eltypeθ,
-        hcat(vec(map(points -> collect(points),
-            Iterators.product(bc_data...)))...))
-
     pde_train_sets = map(pde_args) do bt
-        span = map(b -> get(dict_var_span_, b, b), bt)
-        _set = adapt(eltypeθ,
-            hcat(vec(map(points -> collect(points), Iterators.product(span...)))...))
+        span = get.((dict_var_span_,), bt, bt)
+        return reduce(hcat, vec(map(collect, Iterators.product(span...)))) |>
+               EltypeAdaptor{eltypeθ}()
     end
-    [pde_train_sets, bcs_train_sets]
+
+    return [pde_train_sets, bcs_train_sets]
 end
 
 """
@@ -245,32 +214,33 @@ training strategy: StochasticTraining, QuasiRandomTraining, QuadratureTraining.
 function get_bounds end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, _indvars::Array, _depvars::Array, strategy)
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-        _depvars)
+    _, _, dict_indvars, dict_depvars, _ = get_vars(_indvars, _depvars)
     return get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
 end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, _indvars::Array, _depvars::Array,
         strategy::QuadratureTraining)
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-        _depvars)
+    _, _, dict_indvars, dict_depvars, _ = get_vars(_indvars, _depvars)
     return get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
 end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
-        strategy::QuadratureTraining)
+        ::QuadratureTraining)
     dict_lower_bound = Dict([Symbol(d.variables) => infimum(d.domain) for d in domains])
     dict_upper_bound = Dict([Symbol(d.variables) => supremum(d.domain) for d in domains])
 
     pde_args = get_argument(eqs, dict_indvars, dict_depvars)
 
+    ϵ = cbrt(eps(eltypeθ))
+    eltype_adaptor = EltypeAdaptor{eltypeθ}()
+
     pde_lower_bounds = map(pde_args) do pd
-        span = map(p -> get(dict_lower_bound, p, p), pd)
-        map(s -> adapt(eltypeθ, s) + cbrt(eps(eltypeθ)), span)
+        span = get.((dict_lower_bound,), pd, pd) |> eltype_adaptor
+        return span .+ ϵ
     end
     pde_upper_bounds = map(pde_args) do pd
-        span = map(p -> get(dict_upper_bound, p, p), pd)
-        map(s -> adapt(eltypeθ, s) - cbrt(eps(eltypeθ)), span)
+        span = get.((dict_upper_bound,), pd, pd) |> eltype_adaptor
+        return span .+ ϵ
     end
     pde_bounds = [pde_lower_bounds, pde_upper_bounds]
 
@@ -284,42 +254,39 @@ function get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
     end
     bcs_bounds = [bcs_lower_bounds, bcs_upper_bounds]
 
-    [pde_bounds, bcs_bounds]
+    return [pde_bounds, bcs_bounds]
 end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
     dx = 1 / strategy.points
     dict_span = Dict([Symbol(d.variables) => [
-                          infimum(d.domain) + dx,
-                          supremum(d.domain) - dx
-                      ] for d in domains])
+                          infimum(d.domain) + dx, supremum(d.domain) - dx] for d in domains])
 
-    # pde_bounds = [[infimum(d.domain),supremum(d.domain)] for d in domains]
     pde_args = get_argument(eqs, dict_indvars, dict_depvars)
     pde_bounds = map(pde_args) do pde_arg
         bds = mapreduce(s -> get(dict_span, s, fill(s, 2)), hcat, pde_arg)
         bds = eltypeθ.(bds)
-        bds[1, :], bds[2, :]
+        return bds[1, :], bds[2, :]
     end
 
     bound_args = get_argument(bcs, dict_indvars, dict_depvars)
     bcs_bounds = map(bound_args) do bound_arg
         bds = mapreduce(s -> get(dict_span, s, fill(s, 2)), hcat, bound_arg)
         bds = eltypeθ.(bds)
-        bds[1, :], bds[2, :]
+        return bds[1, :], bds[2, :]
     end
+
     return pde_bounds, bcs_bounds
 end
 
 function get_numeric_integral(pinnrep::PINNRepresentation)
-    @unpack strategy, indvars, depvars, multioutput, derivative,
-    depvars, indvars, dict_indvars, dict_depvars = pinnrep
+    (; strategy, indvars, depvars, derivative, depvars, indvars, dict_indvars, dict_depvars) = pinnrep
 
-    integral = (u, cord, phi, integrating_var_id, integrand_func, lb, ub, θ; strategy = strategy, indvars = indvars, depvars = depvars, dict_indvars = dict_indvars, dict_depvars = dict_depvars) -> begin
+    return (u, cord, phi, integrating_var_id, integrand_func, lb, ub, θ; strategy = strategy, indvars = indvars, depvars = depvars, dict_indvars = dict_indvars, dict_depvars = dict_depvars) -> begin
         function integration_(cord, lb, ub, θ)
             cord_ = cord
             function integrand_(x, p)
-                ChainRulesCore.@ignore_derivatives @views(cord_[integrating_var_id]) .= x
+                @ignore_derivatives cord_[integrating_var_id] .= x
                 return integrand_func(cord_, p, phi, derivative, nothing, u, nothing)
             end
             prob_ = IntegralProblem(integrand_, (lb, ub), θ)
@@ -332,24 +299,22 @@ function get_numeric_integral(pinnrep::PINNRepresentation)
         ub_ = zeros(size(ub)[1], size(cord)[2])
         for (i, l) in enumerate(lb)
             if l isa Number
-                ChainRulesCore.@ignore_derivatives lb_[i, :] = fill(l, 1, size(cord)[2])
+                @ignore_derivatives lb_[i, :] .= l
             else
-                ChainRulesCore.@ignore_derivatives lb_[i, :] = l(cord, θ, phi, derivative,
-                    nothing, u, nothing)
+                @ignore_derivatives lb_[i, :] = l(
+                    cord, θ, phi, derivative, nothing, u, nothing)
             end
         end
         for (i, u_) in enumerate(ub)
             if u_ isa Number
-                ChainRulesCore.@ignore_derivatives ub_[i, :] = fill(u_, 1, size(cord)[2])
+                @ignore_derivatives ub_[i, :] .= u_
             else
-                ChainRulesCore.@ignore_derivatives ub_[i, :] = u_(cord, θ, phi, derivative,
+                @ignore_derivatives ub_[i, :] = u_(cord, θ, phi, derivative,
                     nothing, u, nothing)
             end
         end
         integration_arr = Matrix{Float64}(undef, 1, 0)
-        for i in 1:size(cord)[2]
-            # ub__ = @Zygote.ignore getindex(ub_, :,  i)
-            # lb__ = @Zygote.ignore getindex(lb_, :,  i)
+        for i in 1:size(cord, 2)
             integration_arr = hcat(integration_arr,
                 integration_(cord[:, i], lb_[:, i], ub_[:, i], θ))
         end
@@ -364,33 +329,25 @@ end
 It transforms a symbolic description of a ModelingToolkit-defined `PDESystem` into a
 `PINNRepresentation` which holds the pieces required to build an `OptimizationProblem`
 for [Optimization.jl](https://docs.sciml.ai/Optimization/stable) or a Likelihood Function
-used for HMC based Posterior Sampling Algorithms [AdvancedHMC.jl](https://turinglang.org/AdvancedHMC.jl/stable/)
-which is later optimized upon to give Solution or the Solution Distribution of the PDE.
+used for HMC based Posterior Sampling Algorithms
+[AdvancedHMC.jl](https://turinglang.org/AdvancedHMC.jl/stable/) which is later optimized
+upon to give Solution or the Solution Distribution of the PDE.
 
 For more information, see `discretize` and `PINNRepresentation`.
 """
-function SciMLBase.symbolic_discretize(pde_system::PDESystem,
-        discretization::AbstractPINN)
-    eqs = pde_system.eqs
-    bcs = pde_system.bcs
-    chain = discretization.chain
-
-    domains = pde_system.domain
+function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::AbstractPINN)
+    (; eqs, bcs, domain) = pde_system
     eq_params = pde_system.ps
     defaults = pde_system.defaults
-    default_p = eq_params == SciMLBase.NullParameters() ? nothing :
-                [defaults[ep] for ep in eq_params]
-
-    param_estim = discretization.param_estim
-    additional_loss = discretization.additional_loss
+    (; chain, param_estim, additional_loss, multioutput, init_params, phi, derivative, strategy, logger, iteration, self_increment) = discretization
+    (; log_frequency) = discretization.log_options
     adaloss = discretization.adaptive_loss
 
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(
-        pde_system.indvars,
-        pde_system.depvars)
+    default_p = eq_params isa SciMLBase.NullParameters ? nothing :
+                [defaults[ep] for ep in eq_params]
 
-    multioutput = discretization.multioutput
-    init_params = discretization.init_params
+    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(
+        pde_system.indvars, pde_system.depvars)
 
     if init_params === nothing
         # Use the initialization of the neural network framework
@@ -398,70 +355,41 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
         # This is done because Float64 is almost always better for these applications
         if chain isa AbstractArray
             x = map(chain) do x
-                _x = ComponentArrays.ComponentArray(Lux.initialparameters(
-                    Random.default_rng(),
-                    x))
-                Float64.(_x) # No ComponentArray GPU support
+                ComponentArray{Float64}(LuxCore.initialparameters(Random.default_rng(), x))
             end
             names = ntuple(i -> depvars[i], length(chain))
-            init_params = ComponentArrays.ComponentArray(NamedTuple{names}(i
-            for i in x))
+            init_params = ComponentArray(NamedTuple{names}(Tuple(x)))
         else
-            init_params = Float64.(ComponentArrays.ComponentArray(Lux.initialparameters(
-                Random.default_rng(),
-                chain)))
+            init_params = ComponentArray{Float64}(LuxCore.initialparameters(
+                Random.default_rng(), chain))
         end
-    else
-        init_params = init_params
     end
 
-    flat_init_params = if init_params isa ComponentArrays.ComponentArray
+    flat_init_params = if init_params isa ComponentArray
         init_params
     elseif multioutput
         @assert length(init_params) == length(depvars)
         names = ntuple(i -> depvars[i], length(init_params))
-        x = ComponentArrays.ComponentArray(NamedTuple{names}(i for i in init_params))
+        x = ComponentArray(NamedTuple{names}(Tuple(init_params)))
     else
-        ComponentArrays.ComponentArray(init_params)
+        ComponentArray(init_params)
     end
 
-    flat_init_params = if param_estim == false && multioutput
-        ComponentArrays.ComponentArray(; depvar = flat_init_params)
-    elseif param_estim == false && !multioutput
-        flat_init_params
+    flat_init_params = if !param_estim
+        multioutput ? ComponentArray(; depvar = flat_init_params) : flat_init_params
     else
-        ComponentArrays.ComponentArray(; depvar = flat_init_params, p = default_p)
+        ComponentArray(; depvar = flat_init_params, p = default_p)
     end
 
-    eltypeθ = eltype(flat_init_params)
-
-    if adaloss === nothing
-        adaloss = NonAdaptiveLoss{eltypeθ}()
+    if length(flat_init_params) == 0 && !Base.isconcretetype(eltype(flat_init_params))
+        flat_init_params = ComponentArray(
+            convert(AbstractArray{Float64}, getdata(flat_init_params)),
+            getaxes(flat_init_params))
     end
 
-    phi = discretization.phi
+    adaloss === nothing && (adaloss = NonAdaptiveLoss{eltype(flat_init_params)}())
 
-    if (phi isa Vector && phi[1].f isa Lux.AbstractExplicitLayer)
-        for ϕ in phi
-            ϕ.st = adapt(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-                ϕ.st)
-        end
-    elseif (!(phi isa Vector) && phi.f isa Lux.AbstractExplicitLayer)
-        phi.st = adapt(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-            phi.st)
-    end
-
-    derivative = discretization.derivative
-    strategy = discretization.strategy
-
-    logger = discretization.logger
-    log_frequency = discretization.log_options.log_frequency
-    iteration = discretization.iteration
-    self_increment = discretization.self_increment
-
-    if !(eqs isa Array)
-        eqs = [eqs]
-    end
+    eqs isa Array || (eqs = [eqs])
 
     pde_indvars = if strategy isa QuadratureTraining
         get_argument(eqs, dict_indvars, dict_depvars)
@@ -478,7 +406,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
     pde_integration_vars = get_integration_variables(eqs, dict_indvars, dict_depvars)
     bc_integration_vars = get_integration_variables(bcs, dict_indvars, dict_depvars)
 
-    pinnrep = PINNRepresentation(eqs, bcs, domains, eq_params, defaults, default_p,
+    pinnrep = PINNRepresentation(eqs, bcs, domain, eq_params, defaults, default_p,
         param_estim, additional_loss, adaloss, depvars, indvars,
         dict_indvars, dict_depvars, dict_depvar_input, logger,
         multioutput, iteration, init_params, flat_init_params, phi,
@@ -503,39 +431,32 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
     pinnrep.symbolic_bc_loss_functions = symbolic_bc_loss_functions
 
     datafree_pde_loss_functions = [build_loss_function(pinnrep, eq, pde_indvar)
-                                   for (eq, pde_indvar, integration_indvar) in zip(eqs,
-        pde_indvars,
-        pde_integration_vars)]
+                                   for (eq, pde_indvar) in zip(eqs, pde_indvars)]
 
     datafree_bc_loss_functions = [build_loss_function(pinnrep, bc, bc_indvar)
-                                  for (bc, bc_indvar, integration_indvar) in zip(bcs,
-        bc_indvars,
-        bc_integration_vars)]
+                                  for (bc, bc_indvar) in zip(bcs, bc_indvars)]
+
+    pde_loss_functions, bc_loss_functions = merge_strategy_with_loss_function(pinnrep,
+        strategy, datafree_pde_loss_functions, datafree_bc_loss_functions)
+
+    # setup for all adaptive losses
+    num_pde_losses = length(pde_loss_functions)
+    num_bc_losses = length(bc_loss_functions)
+    # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
+    num_additional_loss = convert(Int, additional_loss !== nothing)
 
-    function get_likelihood_estimate_function(discretization::PhysicsInformedNN)
-        pde_loss_functions, bc_loss_functions = merge_strategy_with_loss_function(pinnrep,
-            strategy,
-            datafree_pde_loss_functions,
-            datafree_bc_loss_functions)
-        # setup for all adaptive losses
-        num_pde_losses = length(pde_loss_functions)
-        num_bc_losses = length(bc_loss_functions)
-        # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
-        num_additional_loss = additional_loss isa Nothing ? 0 : 1
-
-        adaloss_T = eltype(adaloss.pde_loss_weights)
-
-        # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
-        adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .*
-                                   adaloss.pde_loss_weights
-        adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
-        adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
-                                          adaloss.additional_loss_weights
+    adaloss_T = eltype(adaloss.pde_loss_weights)
+
+    # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
+    adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .* adaloss.pde_loss_weights
+    adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
+    adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
+                                      adaloss.additional_loss_weights
 
     reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
-        pde_loss_functions,
-        bc_loss_functions)
+        pde_loss_functions, bc_loss_functions)
 
+    function get_likelihood_estimate_function(::PhysicsInformedNN)
         function full_loss_function(θ, p)
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
             pde_losses = [pde_loss_function(θ) for pde_loss_function in pde_loss_functions]
@@ -543,13 +464,12 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
 
             # this is kind of a hack, and means that whenever the outer function is evaluated the increment goes up, even if it's not being optimized
             # that's why we prefer the user to maintain the increment in the outer loop callback during optimization
-            ChainRulesCore.@ignore_derivatives if self_increment
-                iteration[1] += 1
+            @ignore_derivatives if self_increment
+                iteration[] += 1
             end
 
-            ChainRulesCore.@ignore_derivatives begin
-                reweight_losses_func(θ, pde_losses,
-                    bc_losses)
+            @ignore_derivatives begin
+                reweight_losses_func(θ, pde_losses, bc_losses)
             end
 
             weighted_pde_losses = adaloss.pde_loss_weights .* pde_losses
@@ -563,126 +483,87 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             full_weighted_loss = if additional_loss isa Nothing
                 weighted_loss_before_additional
             else
-                function _additional_loss(phi, θ)
-                    (θ_, p_) = if (param_estim == true)
-                        θ.depvar, θ.p
-                    else
-                        θ, nothing
-                    end
-                    return additional_loss(phi, θ_, p_)
-                end
+                (θ_, p_) = param_estim ? (θ.depvar, θ.p) : (θ, nothing)
+                _additional_loss = additional_loss(phi, θ_, p_)
                 weighted_additional_loss_val = adaloss.additional_loss_weights[1] *
-                                               _additional_loss(phi, θ)
+                                               _additional_loss
                 weighted_loss_before_additional + weighted_additional_loss_val
             end
 
-            ChainRulesCore.@ignore_derivatives begin
-                if iteration[1] % log_frequency == 0
+            @ignore_derivatives begin
+                if iteration[] % log_frequency == 0
                     logvector(pinnrep.logger, pde_losses, "unweighted_loss/pde_losses",
-                        iteration[1])
-                    logvector(pinnrep.logger,
-                        bc_losses,
-                        "unweighted_loss/bc_losses",
-                        iteration[1])
+                        iteration[])
+                    logvector(pinnrep.logger, bc_losses, "unweighted_loss/bc_losses",
+                        iteration[])
                     logvector(pinnrep.logger, weighted_pde_losses,
-                        "weighted_loss/weighted_pde_losses",
-                        iteration[1])
+                        "weighted_loss/weighted_pde_losses", iteration[])
                     logvector(pinnrep.logger, weighted_bc_losses,
-                        "weighted_loss/weighted_bc_losses",
-                        iteration[1])
-                    if !(additional_loss isa Nothing)
+                        "weighted_loss/weighted_bc_losses", iteration[])
+                    if additional_loss !== nothing
                         logscalar(pinnrep.logger, weighted_additional_loss_val,
-                            "weighted_loss/weighted_additional_loss", iteration[1])
+                            "weighted_loss/weighted_additional_loss", iteration[])
                     end
                     logscalar(pinnrep.logger, sum_weighted_pde_losses,
-                        "weighted_loss/sum_weighted_pde_losses", iteration[1])
+                        "weighted_loss/sum_weighted_pde_losses", iteration[])
                     logscalar(pinnrep.logger, sum_weighted_bc_losses,
-                        "weighted_loss/sum_weighted_bc_losses", iteration[1])
+                        "weighted_loss/sum_weighted_bc_losses", iteration[])
                     logscalar(pinnrep.logger, full_weighted_loss,
-                        "weighted_loss/full_weighted_loss",
-                        iteration[1])
+                        "weighted_loss/full_weighted_loss", iteration[])
                     logvector(pinnrep.logger, adaloss.pde_loss_weights,
-                        "adaptive_loss/pde_loss_weights",
-                        iteration[1])
+                        "adaptive_loss/pde_loss_weights", iteration[])
                     logvector(pinnrep.logger, adaloss.bc_loss_weights,
-                        "adaptive_loss/bc_loss_weights",
-                        iteration[1])
+                        "adaptive_loss/bc_loss_weights", iteration[])
                 end
             end
 
             return full_weighted_loss
         end
 
-        return bc_loss_functions, pde_loss_functions, full_loss_function
+        return full_loss_function
     end
 
     function get_likelihood_estimate_function(discretization::BayesianPINN)
-        # Because separate reweighting code section needed and loglikelihood is pointwise independent
-        pde_loss_functions, bc_loss_functions = merge_strategy_with_loglikelihood_function(
-            pinnrep,
-            strategy,
-            datafree_pde_loss_functions,
-            datafree_bc_loss_functions)
-
-        # setup for all adaptive losses
-        num_pde_losses = length(pde_loss_functions)
-        num_bc_losses = length(bc_loss_functions)
-        # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
-        num_additional_loss = additional_loss isa Nothing ? 0 : 1
-
-        adaloss_T = eltype(adaloss.pde_loss_weights)
-
-        # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
-        adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .*
-                                   adaloss.pde_loss_weights
-        adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
-        adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
-                                          adaloss.additional_loss_weights
-
-        reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
-            pde_loss_functions,
-            bc_loss_functions)
-
         dataset_pde, dataset_bc = discretization.dataset
-        dataset_pde = dataset_pde isa Nothing ? dataset_pde : get_dataset_train_points(eqs, dataset_pde, pinnrep)
-        dataset_bc = dataset_bc isa Nothing ? dataset_bc : get_dataset_train_points(eqs, dataset_bc, pinnrep)
 
         # required as Physics loss also needed on the discrete dataset domain points
         # data points are discrete and so by default GridTraining loss applies
-        # passing placeholder dx with GridTraining, it uses dataset points irl
-        datapde_loss_functions, databc_loss_functions = merge_strategy_with_loglikelihood_function(
-                pinnrep,
-                GridTraining(0.1),
-                datafree_pde_loss_functions,
-                datafree_bc_loss_functions,
-                train_sets_pde = dataset_pde,
-                train_sets_bc = dataset_bc)
+        # passing placeholder dx with GridTraining, it uses data points irl
+        datapde_loss_functions, databc_loss_functions = if dataset_bc !== nothing ||
+                                                           dataset_pde !== nothing
+            merge_strategy_with_loglikelihood_function(pinnrep, GridTraining(0.1),
+                datafree_pde_loss_functions, datafree_bc_loss_functions,
+                train_sets_pde = dataset_pde, train_sets_bc = dataset_bc)
+        else
+            nothing, nothing
+        end
 
         function full_loss_function(θ, allstd::Vector{Vector{Float64}})
-            stdpdes, stdbcs, stdextra, stdpdesnew = allstd
+            stdpdes, stdbcs, stdextra = allstd
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
+            pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
+                                  for (i, pde_loss_function) in enumerate(pde_loss_functions)]
 
-            pde_loglikelihoods = sum([pde_loss_function(θ, stdpdes[i])
-                                      for (i, pde_loss_function) in enumerate(pde_loss_functions)])
-            bc_loglikelihoods = sum([bc_loss_function(θ, stdbcs[j])
-                                     for (j, bc_loss_function) in enumerate(bc_loss_functions)])
+            bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+                                 for (j, bc_loss_function) in enumerate(bc_loss_functions)]
 
             if !(datapde_loss_functions isa Nothing)
-                pde_loglikelihoods += sum([datapde_loss_function(θ, stdpdes[i])
-                                           for (i, datapde_loss_function) in enumerate(datapde_loss_functions)])
+                pde_loglikelihoods += [logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
+                                       for (j, pde_loss_function) in enumerate(datapde_loss_functions)]
             end
+
             if !(databc_loss_functions isa Nothing)
-                bc_loglikelihoods += sum([databc_loss_function(θ, stdbcs[j])
-                                          for (j, databc_loss_function) in enumerate(databc_loss_functions)])
+                bc_loglikelihoods += [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+                                      for (j, bc_loss_function) in enumerate(databc_loss_functions)]
             end
 
             # this is kind of a hack, and means that whenever the outer function is evaluated the increment goes up, even if it's not being optimized
             # that's why we prefer the user to maintain the increment in the outer loop callback during optimization
-            ChainRulesCore.@ignore_derivatives if self_increment
-                iteration[1] += 1
+            @ignore_derivatives if self_increment
+                iteration[] += 1
             end
 
-            ChainRulesCore.@ignore_derivatives begin
+            @ignore_derivatives begin
                 reweight_losses_func(θ, pde_loglikelihoods,
                     bc_loglikelihoods)
             end
@@ -698,17 +579,9 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             full_weighted_loglikelihood = if additional_loss isa Nothing
                 weighted_loglikelihood_before_additional
             else
-                function _additional_loss(phi, θ)
-                    (θ_, p_) = if (param_estim == true)
-                        θ.depvar, θ.p
-                    else
-                        θ, nothing
-                    end
-                    return additional_loss(phi, θ_, p_)
-                end
-
-                _additional_loglikelihood = logpdf(Normal(0, stdextra),
-                    _additional_loss(phi, θ))
+                (θ_, p_) = param_estim ? (θ.depvar, θ.p) : (θ, nothing)
+                _additional_loss = additional_loss(phi, θ_, p_)
+                _additional_loglikelihood = logpdf(Normal(0, stdextra), _additional_loss)
 
                 weighted_additional_loglikelihood = adaloss.additional_loss_weights[1] *
                                                     _additional_loglikelihood
@@ -719,15 +592,13 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             return full_weighted_loglikelihood
         end
 
-        return bc_loss_functions, pde_loss_functions, full_loss_function
+        return full_loss_function
     end
 
-    bc_loss_functions, pde_loss_functions, full_loss_function = get_likelihood_estimate_function(discretization)
-
+    full_loss_function = get_likelihood_estimate_function(discretization)
     pinnrep.loss_functions = PINNLossFunctions(bc_loss_functions, pde_loss_functions,
-                                                full_loss_function, additional_loss,
-                                                datafree_pde_loss_functions,
-                                                datafree_bc_loss_functions)
+        full_loss_function, additional_loss, datafree_pde_loss_functions,
+        datafree_bc_loss_functions)
 
     return pinnrep
 end
@@ -736,12 +607,11 @@ end
     prob = discretize(pde_system::PDESystem, discretization::PhysicsInformedNN)
 
 Transforms a symbolic description of a ModelingToolkit-defined `PDESystem` and generates
-an `OptimizationProblem` for [Optimization.jl](https://docs.sciml.ai/Optimization/stable/) whose
-solution is the solution to the PDE.
+an `OptimizationProblem` for [Optimization.jl](https://docs.sciml.ai/Optimization/stable/)
+whose solution is the solution to the PDE.
 """
 function SciMLBase.discretize(pde_system::PDESystem, discretization::PhysicsInformedNN)
     pinnrep = symbolic_discretize(pde_system, discretization)
-    f = OptimizationFunction(pinnrep.loss_functions.full_loss_function,
-        Optimization.AutoZygote())
-    Optimization.OptimizationProblem(f, pinnrep.flat_init_params)
-end
+    f = OptimizationFunction(pinnrep.loss_functions.full_loss_function, AutoZygote())
+    return Optimization.OptimizationProblem(f, pinnrep.flat_init_params)
+end
\ No newline at end of file

From b60a19212ddd763772fc9557288065baa89bf1e5 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Wed, 27 Sep 2023 16:07:50 +0530
Subject: [PATCH 059/107] Better Posterior Formulation

---
 src/BPINN_ode.jl        |   1 +
 src/advancedHMC_MCMC.jl | 310 ++++++++++++++++++++++++++++++++++
 test/BPINN_tests.jl     | 366 ----------------------------------------
 3 files changed, 311 insertions(+), 366 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index f65f1d659e..9e137fb5d8 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -214,6 +214,7 @@ function SciMLBase.__solve(prob::SciMLBase.ODEProblem, alg::BNNODE, args...; dt
         push!(ensemblecurves, ensemblecurve)
     end
 
+    # estimated using all samples
     nnparams = length(θinit)
     estimnnparams = [Particles(reduce(hcat, samples[(end - numensemble):end])[i, :])
                      for i in 1:nnparams]
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 380d284f55..1f0189776f 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -68,6 +68,311 @@ function LogDensityProblems.capabilities(::LogTargetDensity)
     return LogDensityProblems.LogDensityOrder{1}()
 end
 
+# suggested extra loss function
+function L2loss2(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        dataset, deri_sol = Tar.dataset
+        # deri_sol = deri_sol'
+        autodiff = Tar.autodiff
+
+        # # Timepoints to enforce Physics
+        # dataset = Array(reduce(hcat, dataset)')
+        # t = dataset[end, :]
+        # û = dataset[1:(end - 1), :]
+
+        # ode_params = Tar.extraparams == 1 ?
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        # if length(û[:, 1]) == 1
+        #     physsol = [f(û[:, i][1],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # else
+        #     physsol = [f(û[:, i],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # end
+        # #form of NN output matrix output dim x n
+        # deri_physsol = reduce(hcat, physsol)
+
+        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
+        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+        # if length(û[:, 1]) == 1
+        #     deri_sol = [f(û[:, i][1],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # else
+        #     deri_sol = [f(û[:, i],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # end
+        # deri_sol = reduce(hcat, deri_sol) 
+        # deri_sol = reduce(hcat, derivatives)
+
+        # Timepoints to enforce Physics 
+        t = dataset[end]
+        u1 = dataset[2]
+        û = dataset[1]
+        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
+        #  
+
+        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        if length(Tar.prob.u0) == 1
+            physsol = [f(û[i],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        else
+            physsol = [f([û[i], u1[i]],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        end
+        #form of NN output matrix output dim x n 
+        deri_physsol = reduce(hcat, physsol)
+
+        # if length(Tar.prob.u0) == 1
+        #     nnsol = [f(û[i],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # else
+        #     nnsol = [f([û[i], u1[i]],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # end
+        # form of NN output matrix output dim x n
+        # nnsol = reduce(hcat, nnsol)
+
+        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
+        # # convert to matrix as nnsol  
+
+        physlogprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # can add phystd[i] for u[i] 
+            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+                    LinearAlgebra.Diagonal(map(abs2,
+                        (Tar.l2std[i] * 4.0) .*
+                        ones(length(nnsol[i, :]))))),
+                nnsol[i, :])
+        end
+        return physlogprob
+    else
+        return 0
+    end
+end
+
+# PDE(DU,U,P,T)=0
+
+# Derivated via Central Diff
+# function calculate_derivatives2(dataset)
+#     x̂, time = dataset
+#     num_points = length(x̂)
+#     # Initialize an array to store the derivative values.
+#     derivatives = similar(x̂)
+
+#     for i in 2:(num_points - 1)
+#         # Calculate the first-order derivative using central differences.
+#         Δt_forward = time[i + 1] - time[i]
+#         Δt_backward = time[i] - time[i - 1]
+
+#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+#         derivatives[i] = derivative
+#     end
+
+#     # Derivatives at the endpoints can be calculated using forward or backward differences.
+#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+#     return derivatives
+# end
+
+function calderivatives(prob, dataset)
+    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
+        Flux.Dense(8, 2)) |> Flux.f64
+    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+    function loss(x, y)
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
+        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
+        sum(Flux.mse.(chainflux(x), y))
+    end
+    optimizer = Flux.Optimise.ADAM(0.01)
+    epochs = 3000
+    for epoch in 1:epochs
+        Flux.train!(loss,
+            Flux.params(chainflux),
+            [(dataset[end]', dataset[1:(end - 1)])],
+            optimizer)
+    end
+
+    # A1 = (prob.u0' .+
+    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
+    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
+
+    # A2 = (prob.u0' .+
+    #       (prob.tspan[2] .- (dataset[end]'))' .*
+    #       chainflux(dataset[end]')')
+
+    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
+    A2 = chainflux(dataset[end]')
+
+    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
+
+    return gradients
+end
+
+
+# suggested extra loss function
+function L2loss2(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        dataset, deri_sol = Tar.dataset
+        # deri_sol = deri_sol'
+        autodiff = Tar.autodiff
+
+        # # Timepoints to enforce Physics
+        # dataset = Array(reduce(hcat, dataset)')
+        # t = dataset[end, :]
+        # û = dataset[1:(end - 1), :]
+
+        # ode_params = Tar.extraparams == 1 ?
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        # if length(û[:, 1]) == 1
+        #     physsol = [f(û[:, i][1],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # else
+        #     physsol = [f(û[:, i],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # end
+        # #form of NN output matrix output dim x n
+        # deri_physsol = reduce(hcat, physsol)
+
+        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
+        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+        # if length(û[:, 1]) == 1
+        #     deri_sol = [f(û[:, i][1],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # else
+        #     deri_sol = [f(û[:, i],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # end
+        # deri_sol = reduce(hcat, deri_sol) 
+        # deri_sol = reduce(hcat, derivatives)
+
+        # Timepoints to enforce Physics 
+        t = dataset[end]
+        u1 = dataset[2]
+        û = dataset[1]
+        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
+        #  
+
+        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        if length(Tar.prob.u0) == 1
+            physsol = [f(û[i],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        else
+            physsol = [f([û[i], u1[i]],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        end
+        #form of NN output matrix output dim x n 
+        deri_physsol = reduce(hcat, physsol)
+
+        # if length(Tar.prob.u0) == 1
+        #     nnsol = [f(û[i],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # else
+        #     nnsol = [f([û[i], u1[i]],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # end
+        # form of NN output matrix output dim x n
+        # nnsol = reduce(hcat, nnsol)
+
+        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
+        # # convert to matrix as nnsol  
+
+        physlogprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # can add phystd[i] for u[i] 
+            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+                    LinearAlgebra.Diagonal(map(abs2,
+                        (Tar.l2std[i] * 4.0) .*
+                        ones(length(nnsol[i, :]))))),
+                nnsol[i, :])
+        end
+        return physlogprob
+    else
+        return 0
+    end
+end
+
+
+function calculate_derivatives(dataset)
+
+    # u = dataset[1]
+    # u1 = dataset[2]
+    # t = dataset[end]
+    # # control points
+    # n = Int(floor(length(t) / 10))
+    # # spline for datasetvalues(solution) 
+    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    # interp = CubicSpline(u, t)
+    # interp1 = CubicSpline(u1, t)
+    # # derrivatives interpolation
+    # dx = t[2] - t[1]
+    # time = collect(t[1]:dx:t[end])
+    # smoothu = [interp(i) for i in time]
+    # smoothu1 = [interp1(i) for i in time]
+    # # derivative of the spline (must match function derivative) 
+    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
+    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
+    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
+    # # FDM
+    # # û1 = diff(u) / dx
+    # # dataset[1] and smoothu are almost equal(rounding errors)
+    # return [û, û1] 
+
+end
+
 """
 suggested extra loss function for ODE solver case
 """
@@ -440,6 +745,11 @@ function ahmc_bayesian_pinn_ode(
     metric = Metric(nparameters)
     hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
 
+    println("physics Logpdf is : ", physloglikelihood(ℓπ, initial_θ))
+    println("prior Logpdf is : ", priorweights(ℓπ, initial_θ))
+    println("L2lossData Logpdf is : ", L2LossData(ℓπ, initial_θ))
+    println("L2loss2 Logpdf is : ", L2loss2(ℓπ, initial_θ))
+
     # parallel sampling option
     if nchains != 1
         # Cache to store the chains
diff --git a/test/BPINN_tests.jl b/test/BPINN_tests.jl
index 7f1df5691a..e69de29bb2 100644
--- a/test/BPINN_tests.jl
+++ b/test/BPINN_tests.jl
@@ -1,366 +0,0 @@
-@testitem "BPINN ODE I: Without Param Estimation" tags=[:odebpinn] begin
-    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
-          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
-
-    Random.seed!(100)
-
-    linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-    linear = (u, p, t) -> cos(2 * π * t)
-    tspan = (0.0, 2.0)
-    u0 = 0.0
-    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-    p = prob.p
-
-    # Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
-    ta = range(tspan[1], tspan[2], length = 300)
-    u = [linear_analytic(u0, nothing, ti) for ti in ta]
-    x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-    time = vec(collect(Float64, ta))
-    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-    # testing points for solve() call must match saveat(1/50.0) arg
-    ta0 = range(tspan[1], tspan[2], length = 101)
-    u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
-    x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
-    time1 = vec(collect(Float64, ta0))
-    physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-    chainlux = Chain(Dense(1, 7, tanh), Dense(7, 1))
-    θinit, st = Lux.setup(Random.default_rng(), chainlux)
-
-    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(
-        prob, chainlux, draw_samples = 2500)
-
-    alg = BNNODE(chainlux, draw_samples = 2500)
-    sol1lux = solve(prob, alg)
-
-    # testing points
-    t = time
-    # Mean of last 500 sampled parameter's curves[Ensemble predictions]
-    θ = [vector_to_parameters(fhsamples[i], θinit) for i in 2000:length(fhsamples)]
-    luxar = [chainlux(t', θ[i], st)[1] for i in eachindex(θ)]
-    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-    meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-    # --------------------- ahmc_bayesian_pinn_ode() call
-    @test mean(abs.(x̂ .- meanscurve)) < 0.05
-    @test mean(abs.(physsol1 .- meanscurve)) < 0.005
-
-    #--------------------- solve() call
-    @test mean(abs.(x̂1 .- pmean(sol1lux.ensemblesol[1]))) < 0.025
-    @test mean(abs.(physsol0_1 .- pmean(sol1lux.ensemblesol[1]))) < 0.025
-end
-
-@testitem "BPINN ODE II: With Parameter Estimation" tags=[:odebpinn] begin
-    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
-          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
-
-    Random.seed!(100)
-
-    linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
-    linear = (u, p, t) -> cos(p * t)
-    tspan = (0.0, 2.0)
-    u0 = 0.0
-    p = 2 * pi
-    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
-
-    # Numerical and Analytical Solutions
-    sol1 = solve(prob, Tsit5(); saveat = 0.01)
-    u = sol1.u
-    time = sol1.t
-
-    # BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
-    ta = range(tspan[1], tspan[2], length = 100)
-    u = [linear_analytic(u0, p, ti) for ti in ta]
-    x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-    time = vec(collect(Float64, ta))
-    dataset = [x̂, time]
-    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-    # testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
-    ta0 = range(tspan[1], tspan[2], length = 101)
-    u1 = [linear_analytic(u0, p, ti) for ti in ta0]
-    x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
-    time1 = vec(collect(Float64, ta0))
-    physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-    chainlux1 = Chain(Dense(1, 7, tanh), Dense(7, 1))
-    θinit, st = Lux.setup(Random.default_rng(), chainlux1)
-
-    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(
-        prob, chainlux1, dataset = dataset, draw_samples = 2500,
-        physdt = 1 / 50.0, priorsNNw = (0.0, 3.0), param = [LogNormal(9, 0.5)])
-
-    alg = BNNODE(chainlux1, dataset = dataset, draw_samples = 2500, physdt = 1 / 50.0,
-        priorsNNw = (0.0, 3.0), param = [LogNormal(9, 0.5)])
-
-    sol2lux = solve(prob, alg)
-
-    # testing points
-    t = time
-    # Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-    θ = [vector_to_parameters(fhsamples[i][1:(end - 1)], θinit)
-         for i in 2000:length(fhsamples)]
-    luxar = [chainlux1(t', θ[i], st)[1] for i in eachindex(θ)]
-    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-    meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-    # --------------------- ahmc_bayesian_pinn_ode() call
-    @test mean(abs.(physsol1 .- meanscurve)) < 0.15
-
-    # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-    @test abs(p - mean([fhsamples[i][23] for i in 2000:length(fhsamples)])) < abs(0.35 * p)
-
-    #-------------------------- solve() call
-    @test mean(abs.(physsol1_1 .- pmean(sol2lux.ensemblesol[1]))) < 8e-2
-
-    # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-    @test abs(p - sol2lux.estimated_de_params[1]) < abs(0.15 * p)
-end
-
-@testitem "BPINN ODE III" tags=[:odebpinn] begin
-    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
-          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
-
-    Random.seed!(100)
-
-    linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
-    tspan = (0.0, 10.0)
-    u0 = 0.0
-    p = -5.0
-    prob = ODEProblem(linear, u0, tspan, p)
-    linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
-    # SOLUTION AND CREATE DATASET
-    sol = solve(prob, Tsit5(); saveat = 0.1)
-    u = sol.u
-    time = sol.t
-    x̂ = u .+ (u .* 0.2) .* randn(size(u))
-    dataset = [x̂, time]
-    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-    # separate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
-    time1 = vec(collect(Float64, range(tspan[1], tspan[2], length = 501)))
-    physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-    chainlux12 = Chain(Dense(1, 6, tanh), Dense(6, 6, tanh), Dense(6, 1))
-    θinit, st = Lux.setup(Random.default_rng(), chainlux12)
-
-    fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(
-        prob, chainlux12, draw_samples = 1500, l2std = [0.03],
-        phystd = [0.03], priorsNNw = (0.0, 10.0))
-
-    fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(
-        prob, chainlux12, dataset = dataset, draw_samples = 1500, l2std = [0.03],
-        phystd = [0.03], priorsNNw = (0.0, 10.0), param = [Normal(-7, 4)])
-
-    alg = BNNODE(chainlux12, dataset = dataset, draw_samples = 1500, l2std = [0.03],
-        phystd = [0.03], priorsNNw = (0.0, 10.0), param = [Normal(-7, 4)])
-
-    sol3lux_pestim = solve(prob, alg)
-
-    # testing timepoints
-    t = sol.t
-    #------------------------------ ahmc_bayesian_pinn_ode() call
-    # Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
-    θ = [vector_to_parameters(fhsampleslux12[i], θinit)
-         for i in 1000:length(fhsampleslux12)]
-    luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
-    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-    meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-    θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit)
-         for i in 1000:length(fhsampleslux22)]
-    luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
-    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-    meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-    @test mean(abs, sol.u .- meanscurve2_1) < 1e-1
-    @test mean(abs, physsol1 .- meanscurve2_1) < 1e-1
-    @test mean(abs, sol.u .- meanscurve2_2) < 5e-2
-    @test mean(abs, physsol1 .- meanscurve2_2) < 5e-2
-
-    # estimated parameters(lux chain)
-    param1 = mean(i[62] for i in fhsampleslux22[1000:length(fhsampleslux22)])
-    @test abs(param1 - p) < abs(0.3 * p)
-
-    #-------------------------- solve() call
-    # (lux chain)
-    @test mean(abs, physsol2 .- pmean(sol3lux_pestim.ensemblesol[1])) < 0.15
-    # estimated parameters(lux chain)
-    param1 = sol3lux_pestim.estimated_de_params[1]
-    @test abs(param1 - p) < abs(0.45 * p)
-end
-
-@testitem "BPINN ODE: Translating from Flux" tags=[:odebpinn] begin
-    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
-          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
-
-    Random.seed!(100)
-
-    linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-    linear = (u, p, t) -> cos(2 * π * t)
-    tspan = (0.0, 2.0)
-    u0 = 0.0
-    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-    p = prob.p
-
-    # Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
-    ta = range(tspan[1], tspan[2], length = 300)
-    u = [linear_analytic(u0, nothing, ti) for ti in ta]
-    x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-    time = vec(collect(Float64, ta))
-    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-    # testing points for solve() call must match saveat(1/50.0) arg
-    ta0 = range(tspan[1], tspan[2], length = 101)
-    u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
-    x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
-    time1 = vec(collect(Float64, ta0))
-    physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-    chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(
-        prob, chainflux, draw_samples = 2500)
-    alg = BNNODE(chainflux, draw_samples = 2500)
-    @test alg.chain isa AbstractLuxLayer
-end
-
-@testitem "BPINN ODE III: with the new objective" tags=[:odebpinn] begin
-    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
-          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
-
-    Random.seed!(100)
-
-    linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
-    tspan = (0.0, 10.0)
-    u0 = 0.0
-    p = -5.0
-    prob = ODEProblem(linear, u0, tspan, p)
-    linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
-
-    # SOLUTION AND CREATE DATASET
-    sol = solve(prob, Tsit5(); saveat = 0.1)
-    u = sol.u
-    time = sol.t
-    x̂ = u .+ (0.3 .* randn(size(u)))
-    dataset = [x̂, time]
-    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-    # separate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
-    time1 = vec(collect(Float64, range(tspan[1], tspan[2], length = 501)))
-    physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-    chainlux12 = Chain(Dense(1, 6, tanh), Dense(6, 6, tanh), Dense(6, 1))
-    θinit, st = Lux.setup(Random.default_rng(), chainlux12)
-
-    fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(
-        prob, chainlux12, dataset = dataset, draw_samples = 1000, l2std = [0.1],
-        phystd = [0.03], priorsNNw = (0.0, 1.0), param = [Normal(-7, 3)])
-
-    fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(
-        prob, chainlux12, dataset = dataset, draw_samples = 1000,
-        l2std = [0.1], phystd = [0.03], priorsNNw = (0.0, 1.0),
-        param = [Normal(-7, 3)], estim_collocate = true)
-
-    alg = BNNODE(
-        chainlux12, dataset = dataset, draw_samples = 1000, l2std = [0.1], phystd = [0.03],
-        priorsNNw = (0.0, 1.0), param = [Normal(-7, 3)], estim_collocate = true)
-
-    sol3lux_pestim = solve(prob, alg)
-
-    # testing timepoints
-    t = sol.t
-    #------------------------------ ahmc_bayesian_pinn_ode() call
-    # Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
-    θ = [vector_to_parameters(fhsampleslux12[i][1:(end - 1)], θinit)
-         for i in 750:length(fhsampleslux12)]
-    luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
-    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-    meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-    θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit)
-         for i in 750:length(fhsampleslux22)]
-    luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
-    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-    meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-    @test_broken mean(abs.(sol.u .- meanscurve2_2)) < 6e-2
-    @test_broken mean(abs.(physsol1 .- meanscurve2_2)) < 6e-2
-    @test mean(abs.(sol.u .- meanscurve2_1)) > mean(abs.(sol.u .- meanscurve2_2))
-    @test mean(abs.(physsol1 .- meanscurve2_1)) > mean(abs.(physsol1 .- meanscurve2_2))
-
-    # estimated parameters(lux chain)
-    param2 = mean(i[62] for i in fhsampleslux22[750:length(fhsampleslux22)])
-    @test_broken abs(param2 - p) < abs(0.25 * p)
-
-    param1 = mean(i[62] for i in fhsampleslux12[750:length(fhsampleslux12)])
-    @test abs(param1 - p) < abs(0.8 * p)
-    @test abs(param2 - p) < abs(param1 - p)
-
-    #-------------------------- solve() call
-    # (lux chain)
-    @test_broken mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 0.1
-    # estimated parameters(lux chain)
-    param3 = sol3lux_pestim.estimated_de_params[1]
-    @test_broken abs(param3 - p) < abs(0.2 * p)
-end
-
-@testitem "BPINN ODE IV: Improvement" tags=[:odebpinn] begin
-    using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
-          AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
-
-    Random.seed!(100)
-
-    function lotka_volterra(u, p, t)
-        # Model parameters.
-        α, β, γ, δ = p
-        # Current state.
-        x, y = u
-
-        # Evaluate differential equations.
-        dx = (α - β * y) * x # prey
-        dy = (δ * x - γ) * y # predator
-
-        return [dx, dy]
-    end
-
-    # initial-value problem.
-    u0 = [1.0, 1.0]
-    p = [1.5, 1.0, 3.0, 1.0]
-    tspan = (0.0, 4.0)
-    prob = ODEProblem(lotka_volterra, u0, tspan, p)
-
-    # Solve using OrdinaryDiffEq.jl solver
-    dt = 0.2
-    solution = solve(prob, Tsit5(); saveat = dt)
-
-    times = solution.t
-    u = hcat(solution.u...)
-    x = u[1, :] + (0.8 .* randn(length(u[1, :])))
-    y = u[2, :] + (0.8 .* randn(length(u[2, :])))
-    dataset = [x, y, times]
-
-    chain = Chain(Dense(1, 6, tanh), Dense(6, 6, tanh), Dense(6, 2))
-
-    alg1 = BNNODE(chain; dataset = dataset, draw_samples = 1000,
-        l2std = [0.2, 0.2], phystd = [0.1, 0.1], priorsNNw = (0.0, 1.0),
-        param = [Normal(2, 0.5), Normal(2, 0.5), Normal(2, 0.5), Normal(2, 0.5)])
-
-    alg2 = BNNODE(chain; dataset = dataset, draw_samples = 1000,
-        l2std = [0.2, 0.2], phystd = [0.1, 0.1], priorsNNw = (0.0, 1.0),
-        param = [Normal(2, 0.5), Normal(2, 0.5), Normal(2, 0.5), Normal(2, 0.5)],
-        estim_collocate = true)
-
-    @time sol_pestim1 = solve(prob, alg1; saveat = dt)
-    @time sol_pestim2 = solve(prob, alg2; saveat = dt)
-
-    unsafe_comparisons(true)
-    bitvec = abs.(p .- sol_pestim1.estimated_de_params) .>
-             abs.(p .- sol_pestim2.estimated_de_params)
-    @test_broken bitvec == ones(size(bitvec))
-end

From 11b8dfcca59d148b4bf0085de7127d45e1e80447 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 20 Jan 2024 23:50:27 +0530
Subject: [PATCH 060/107] trying to sync

---
 src/BNNODE_new.jl       |  794 +++++++
 src/BPINN_ode.jl        |    1 -
 src/advancedHMC_MCMC.jl |    5 -
 test/BPINN_newform.jl   | 4354 +++++++++++++++++++++++++++++++++++++++
 test/BPINN_tests.jl     |    0
 5 files changed, 5148 insertions(+), 6 deletions(-)
 create mode 100644 src/BNNODE_new.jl
 create mode 100644 test/BPINN_newform.jl
 delete mode 100644 test/BPINN_tests.jl

diff --git a/src/BNNODE_new.jl b/src/BNNODE_new.jl
new file mode 100644
index 0000000000..e6b1f24faa
--- /dev/null
+++ b/src/BNNODE_new.jl
@@ -0,0 +1,794 @@
+mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
+    P <: Vector{<:Distribution},
+    D <:
+    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}},
+}
+    dim::Int
+    prob::DiffEqBase.ODEProblem
+    chain::C
+    st::S
+    strategy::ST
+    dataset::D
+    priors::P
+    phystd::Vector{Float64}
+    l2std::Vector{Float64}
+    autodiff::Bool
+    physdt::Float64
+    extraparams::Int
+    init_params::I
+
+    function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
+        dataset,
+        priors, phystd, l2std, autodiff, physdt, extraparams,
+        init_params::AbstractVector)
+        new{
+            typeof(chain),
+            Nothing,
+            typeof(strategy),
+            typeof(init_params),
+            typeof(priors),
+            typeof(dataset),
+        }(dim,
+            prob,
+            chain,
+            nothing, strategy,
+            dataset,
+            priors,
+            phystd,
+            l2std,
+            autodiff,
+            physdt,
+            extraparams,
+            init_params)
+    end
+    function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
+        dataset,
+        priors, phystd, l2std, autodiff, physdt, extraparams,
+        init_params::NamedTuple)
+        new{
+            typeof(chain),
+            typeof(st),
+            typeof(strategy),
+            typeof(init_params),
+            typeof(priors),
+            typeof(dataset),
+        }(dim,
+            prob,
+            chain, st, strategy,
+            dataset, priors,
+            phystd, l2std,
+            autodiff,
+            physdt,
+            extraparams,
+            init_params)
+    end
+end
+
+"""
+cool function to convert parameter's vector to ComponentArray of parameters (for Lux Chain: vector of samples -> Lux ComponentArrays)
+"""
+function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
+    @assert length(ps_new) == Lux.parameterlength(ps)
+    i = 1
+    function get_ps(x)
+        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
+        i += length(x)
+        return z
+    end
+    return Functors.fmap(get_ps, ps)
+end
+
+function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
+    return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
+    #  +  L2loss2(Tar, θ)
+end
+
+LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
+
+function LogDensityProblems.capabilities(::LogTargetDensity)
+    LogDensityProblems.LogDensityOrder{1}()
+end
+
+# suggested extra loss function
+function L2loss2(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        dataset, deri_sol = Tar.dataset
+        # deri_sol = deri_sol'
+        autodiff = Tar.autodiff
+
+        # # Timepoints to enforce Physics
+        # dataset = Array(reduce(hcat, dataset)')
+        # t = dataset[end, :]
+        # û = dataset[1:(end - 1), :]
+
+        # ode_params = Tar.extraparams == 1 ?
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        # if length(û[:, 1]) == 1
+        #     physsol = [f(û[:, i][1],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # else
+        #     physsol = [f(û[:, i],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # end
+        # #form of NN output matrix output dim x n
+        # deri_physsol = reduce(hcat, physsol)
+
+        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
+        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+        # if length(û[:, 1]) == 1
+        #     deri_sol = [f(û[:, i][1],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # else
+        #     deri_sol = [f(û[:, i],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # end
+        # deri_sol = reduce(hcat, deri_sol) 
+        # deri_sol = reduce(hcat, derivatives)
+
+        # Timepoints to enforce Physics 
+        t = dataset[end]
+        u1 = dataset[2]
+        û = dataset[1]
+        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
+        #  
+
+        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        if length(Tar.prob.u0) == 1
+            physsol = [f(û[i],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        else
+            physsol = [f([û[i], u1[i]],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        end
+        #form of NN output matrix output dim x n 
+        deri_physsol = reduce(hcat, physsol)
+
+        # if length(Tar.prob.u0) == 1
+        #     nnsol = [f(û[i],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # else
+        #     nnsol = [f([û[i], u1[i]],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # end
+        # form of NN output matrix output dim x n
+        # nnsol = reduce(hcat, nnsol)
+
+        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
+        # # convert to matrix as nnsol  
+
+        physlogprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # can add phystd[i] for u[i] 
+            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+                    LinearAlgebra.Diagonal(map(abs2,
+                        (Tar.l2std[i] * 4.0) .*
+                        ones(length(nnsol[i, :]))))),
+                nnsol[i, :])
+        end
+        return physlogprob
+    else
+        return 0
+    end
+end
+
+# PDE(DU,U,P,T)=0
+
+# Derivated via Central Diff
+# function calculate_derivatives2(dataset)
+#     x̂, time = dataset
+#     num_points = length(x̂)
+#     # Initialize an array to store the derivative values.
+#     derivatives = similar(x̂)
+
+#     for i in 2:(num_points - 1)
+#         # Calculate the first-order derivative using central differences.
+#         Δt_forward = time[i + 1] - time[i]
+#         Δt_backward = time[i] - time[i - 1]
+
+#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+#         derivatives[i] = derivative
+#     end
+
+#     # Derivatives at the endpoints can be calculated using forward or backward differences.
+#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+#     return derivatives
+# end
+
+function calderivatives(prob, dataset)
+    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
+        Flux.Dense(8, 2)) |> Flux.f64
+    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+    function loss(x, y)
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
+        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
+        sum(Flux.mse.(chainflux(x), y))
+    end
+    optimizer = Flux.Optimise.ADAM(0.01)
+    epochs = 3000
+    for epoch in 1:epochs
+        Flux.train!(loss,
+            Flux.params(chainflux),
+            [(dataset[end]', dataset[1:(end - 1)])],
+            optimizer)
+    end
+
+    # A1 = (prob.u0' .+
+    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
+    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
+
+    # A2 = (prob.u0' .+
+    #       (prob.tspan[2] .- (dataset[end]'))' .*
+    #       chainflux(dataset[end]')')
+
+    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
+    A2 = chainflux(dataset[end]')
+
+    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
+
+    return gradients
+end
+
+function calculate_derivatives(dataset)
+
+    # u = dataset[1]
+    # u1 = dataset[2]
+    # t = dataset[end]
+    # # control points
+    # n = Int(floor(length(t) / 10))
+    # # spline for datasetvalues(solution) 
+    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    # interp = CubicSpline(u, t)
+    # interp1 = CubicSpline(u1, t)
+    # # derrivatives interpolation
+    # dx = t[2] - t[1]
+    # time = collect(t[1]:dx:t[end])
+    # smoothu = [interp(i) for i in time]
+    # smoothu1 = [interp1(i) for i in time]
+    # # derivative of the spline (must match function derivative) 
+    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
+    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
+    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
+    # # FDM
+    # # û1 = diff(u) / dx
+    # # dataset[1] and smoothu are almost equal(rounding errors)
+    # return [û, û1] 
+
+end
+
+"""
+L2 loss loglikelihood(needed for ODE parameter estimation)
+"""
+function L2LossData(Tar::LogTargetDensity, θ)
+    dataset = Tar.dataset
+    # check if dataset is provided
+    if dataset isa Vector{Nothing} || Tar.extraparams == 0
+        return 0
+    else
+        # matrix(each row corresponds to vector u's rows)
+        nn = Tar(dataset[end], θ[1:(length(θ) - Tar.extraparams)])
+
+        L2logprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
+            L2logprob += logpdf(MvNormal(nn[i, :],
+                    LinearAlgebra.Diagonal(map(abs2,
+                        (Tar.l2std[i] * 0.5) .*
+                        ones(length(dataset[i]))))),
+                dataset[i])
+        end
+        return L2logprob
+    end
+end
+
+"""
+physics loglikelihood over problem timespan + dataset timepoints
+"""
+function physloglikelihood(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+    p = Tar.prob.p
+    tspan = Tar.prob.tspan
+    autodiff = Tar.autodiff
+    strategy = Tar.strategy
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+    else
+        ode_params = p == SciMLBase.NullParameters() ? [] : p
+    end
+
+    return getlogpdf(strategy, Tar, f, autodiff, tspan, ode_params, θ)
+end
+
+function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
+    tspan,
+    ode_params, θ)
+    if Tar.dataset isa Vector{Nothing}
+        t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
+    else
+        t = vcat(collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]),
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+function getlogpdf(strategy::StochasticTraining,
+    Tar::LogTargetDensity,
+    f,
+    autodiff::Bool,
+    tspan,
+    ode_params,
+    θ)
+    if Tar.dataset isa Vector{Nothing}
+        t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
+    else
+        t = vcat([(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)],
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
+    autodiff::Bool,
+    tspan,
+    ode_params, θ)
+    function integrand(t::Number, θ)
+        innerdiff(Tar, f, autodiff, [t], θ, ode_params)
+    end
+    intprob = IntegralProblem(integrand, tspan[1], tspan[2], θ; nout = length(Tar.prob.u0))
+    # add dataset logpdf?
+    sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
+    sum(sol.u)
+end
+
+function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
+    autodiff::Bool,
+    tspan,
+    ode_params, θ)
+    minT = tspan[1]
+    maxT = tspan[2]
+
+    weights = strategy.weights ./ sum(strategy.weights)
+
+    N = length(weights)
+    points = strategy.points
+
+    difference = (maxT - minT) / N
+
+    data = Float64[]
+    for (index, item) in enumerate(weights)
+        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
+                    ((index - 1) * difference)
+        data = append!(data, temp_data)
+    end
+
+    if Tar.dataset isa Vector{Nothing}
+        t = data
+    else
+        t = vcat(data,
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+"""
+MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ 
+"""
+function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
+    ode_params)
+
+    # Tar used for phi and LogTargetDensity object attributes access
+    out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
+
+    # # reject samples case(write clear reason why)
+    if any(isinf, out[:, 1]) || any(isinf, ode_params)
+        return -Inf
+    end
+
+    # this is a vector{vector{dx,dy}}(handle case single u(float passed))
+    if length(out[:, 1]) == 1
+        physsol = [f(out[:, i][1],
+            ode_params,
+            t[i])
+                   for i in 1:length(out[1, :])]
+    else
+        physsol = [f(out[:, i],
+            ode_params,
+            t[i])
+                   for i in 1:length(out[1, :])]
+    end
+    physsol = reduce(hcat, physsol)
+
+    nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+    vals = nnsol .- physsol
+
+    # N dimensional vector if N outputs for NN(each row has logpdf of i[i] where u is vector of dependant variables)
+    return [logpdf(MvNormal(vals[i, :],
+            LinearAlgebra.Diagonal(map(abs2,
+                Tar.phystd[i] .*
+                ones(length(vals[i, :]))))),
+        zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
+end
+
+"""
+prior logpdf for NN parameters + ODE constants
+"""
+function priorweights(Tar::LogTargetDensity, θ)
+    allparams = Tar.priors
+    # nn weights
+    nnwparams = allparams[1]
+
+    if Tar.extraparams > 0
+        # Vector of ode parameters priors
+        invpriors = allparams[2:end]
+
+        invlogpdf = sum(logpdf(invpriors[length(θ) - i + 1], θ[i])
+                        for i in (length(θ) - Tar.extraparams + 1):length(θ); init = 0.0)
+
+        return (invlogpdf
+                +
+                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
+    else
+        return logpdf(nnwparams, θ)
+    end
+end
+
+function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params)
+    θ, st = Lux.setup(Random.default_rng(), chain)
+    return init_params, chain, st
+end
+
+function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
+    θ, st = Lux.setup(Random.default_rng(), chain)
+    return θ, chain, st
+end
+
+function generate_Tar(chain::Flux.Chain, init_params)
+    θ, re = Flux.destructure(chain)
+    return init_params, re, nothing
+end
+
+function generate_Tar(chain::Flux.Chain, init_params::Nothing)
+    θ, re = Flux.destructure(chain)
+    # find_good_stepsize,phasepoint takes only float64
+    return θ, re, nothing
+end
+
+"""
+nn OUTPUT AT t,θ ~ phi(t,θ)
+"""
+function (f::LogTargetDensity{C, S})(t::AbstractVector,
+    θ) where {C <: Optimisers.Restructure, S}
+    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
+end
+
+function (f::LogTargetDensity{C, S})(t::AbstractVector,
+    θ) where {C <: Lux.AbstractExplicitLayer, S}
+    θ = vector_to_parameters(θ, f.init_params)
+    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
+    ChainRulesCore.@ignore_derivatives f.st = st
+    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
+end
+
+function (f::LogTargetDensity{C, S})(t::Number,
+    θ) where {C <: Optimisers.Restructure, S}
+    #  must handle paired odes hence u0 broadcasted
+    f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t]))
+end
+
+function (f::LogTargetDensity{C, S})(t::Number,
+    θ) where {C <: Lux.AbstractExplicitLayer, S}
+    θ = vector_to_parameters(θ, f.init_params)
+    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
+    ChainRulesCore.@ignore_derivatives f.st = st
+    f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y
+end
+
+"""
+similar to ode_dfdx() in NNODE/ode_solve.jl
+"""
+function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
+    if autodiff
+        hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...)
+    else
+        (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
+    end
+end
+
+function kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog, δ, λ)
+    if Kernel == HMC
+        Kernel(n_leapfrog)
+    elseif Kernel == HMCDA
+        Kernel(δ, λ)
+    else
+        Kernel(δ, max_depth = max_depth, Δ_max = Δ_max)
+    end
+end
+
+function integratorchoice(Integrator, initial_ϵ, jitter_rate,
+    tempering_rate)
+    if Integrator == JitteredLeapfrog
+        Integrator(initial_ϵ, jitter_rate)
+    elseif Integrator == TemperedLeapfrog
+        Integrator(initial_ϵ, tempering_rate)
+    else
+        Integrator(initial_ϵ)
+    end
+end
+
+function adaptorchoice(Adaptor, mma, ssa)
+    if Adaptor != AdvancedHMC.NoAdaptation()
+        Adaptor(mma, ssa)
+    else
+        AdvancedHMC.NoAdaptation()
+    end
+end
+
+"""
+```julia
+ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
+                    dataset = [nothing],init_params = nothing, 
+                    draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
+                    phystd = [0.05], priorsNNw = (0.0, 2.0),
+                    param = [],nchains = 1,autodiff = false, Kernel = HMC,
+                    Integrator = Leapfrog, Adaptor = StanHMCAdaptor,
+                    targetacceptancerate = 0.8, Metric = DiagEuclideanMetric,
+                    jitter_rate = 3.0, tempering_rate = 3.0, max_depth = 10,
+                    Δ_max = 1000, n_leapfrog = 10, δ = 0.65, λ = 0.3,
+                    progress = false,verbose = false)
+```
+!!! warn
+
+    Note that ahmc_bayesian_pinn_ode() only supports ODEs which are written in the out-of-place form, i.e.
+    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the ahmc_bayesian_pinn_ode()
+    will exit with an error.
+
+## Example
+linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = [5.0, -5.0]
+prob = ODEProblem(linear, u0, tspan, p)
+
+# CREATE DATASET (Necessity for accurate Parameter estimation)
+sol = solve(prob, Tsit5(); saveat = 0.05)
+u = sol.u[1:100]
+time = sol.t[1:100]
+
+# dataset and BPINN create
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+dataset = [x̂, time]
+
+chainflux1 = Flux.Chain(Flux.Dense(1, 5, tanh), Flux.Dense(5, 5, tanh), Flux.Dense(5, 1)
+
+# simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
+fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob,chainflux1,
+                                                                          dataset = dataset,
+                                                                          draw_samples = 1500,
+                                                                          l2std = [0.05],
+                                                                          phystd = [0.05],
+                                                                          priorsNNw = (0.0,3.0))
+
+# solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
+fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob,chainflux1,
+                                                                          dataset = dataset,
+                                                                          draw_samples = 1500,
+                                                                          l2std = [0.05],
+                                                                          phystd = [0.05],
+                                                                          priorsNNw = (0.0,3.0),
+                                                                          param = [Normal(6.5,0.5),Normal(-3,0.5)])
+
+## NOTES 
+Dataset is required for accurate Parameter estimation + solving equations
+Incase you are only solving the Equations for solution, do not provide dataset
+
+## Positional Arguments
+* `prob`: DEProblem(out of place and the function signature should be f(u,p,t)
+* `chain`: Lux/Flux Neural Netork which would be made the Bayesian PINN
+
+## Keyword Arguments
+* `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
+* `dataset`: Vector containing Vectors of corresponding u,t values 
+* `init_params`: intial parameter values for BPINN (ideally for multiple chains different initializations preferred)
+* `nchains`: number of chains you want to sample (random initialisation of params by default)
+* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
+* `l2std`: standard deviation of BPINN predicition against L2 losses/Dataset
+* `phystd`: standard deviation of BPINN predicition against Chosen Underlying ODE System
+* `priorsNNw`: Vector of [mean, std] for BPINN parameter. Weights and Biases of BPINN are Normal Distributions by default
+* `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
+* `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
+* `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
+
+# AHMC.jl is still developing convenience structs so might need changes on new releases.
+* `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implemenations HMC/NUTS/HMCDA)
+* `targetacceptancerate`: Target percentage(in decimal) of iterations in which the proposals were accepted(0.8 by default)
+* `Integrator(jitter_rate, tempering_rate), Metric, Adaptor`: https://turinglang.org/AdvancedHMC.jl/stable/
+* `max_depth`: Maximum doubling tree depth (NUTS)
+* `Δ_max`: Maximum divergence during doubling tree (NUTS)
+* `n_leapfrog`: number of leapfrog steps for HMC
+* `δ`: target acceptance probability for NUTS/HMCDA
+* `λ`: target trajectory length for HMCDA
+* `progress`: controls whether to show the progress meter or not.
+* `verbose`: controls the verbosity. (Sample call args in AHMC)
+
+"""
+
+"""
+dataset would be (x̂,t)
+priors: pdf for W,b + pdf for ODE params
+"""
+function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
+    strategy = GridTraining, dataset = [nothing],
+    init_params = nothing, draw_samples = 1000,
+    physdt = 1 / 20.0, l2std = [0.05],
+    phystd = [0.05], priorsNNw = (0.0, 2.0),
+    param = [], nchains = 1, autodiff = false,
+    Kernel = HMC, Integrator = Leapfrog,
+    Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
+    Metric = DiagEuclideanMetric, jitter_rate = 3.0,
+    tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
+    n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = false,
+    verbose = false)
+
+    # NN parameter prior mean and variance(PriorsNN must be a tuple)
+    if isinplace(prob)
+        throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
+    end
+
+    strategy = strategy == GridTraining ? strategy(physdt) : strategy
+
+    if dataset != [nothing] &&
+       (length(dataset) < 2 || !(typeof(dataset) <: Vector{<:Vector{<:AbstractFloat}}))
+        throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
+    end
+
+    if dataset != [nothing] && param == []
+        println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
+    elseif dataset == [nothing] && param != []
+        throw(error("Dataset Required for Parameter Estimation."))
+    end
+
+    if chain isa Lux.AbstractExplicitLayer || chain isa Flux.Chain
+        # Flux-vector, Lux-Named Tuple
+        initial_nnθ, recon, st = generate_Tar(chain, init_params)
+    else
+        error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")
+    end
+
+    if nchains > Threads.nthreads()
+        throw(error("number of chains is greater than available threads"))
+    elseif nchains < 1
+        throw(error("number of chains must be greater than 1"))
+    end
+
+    # eltype(physdt) cause needs Float64 for find_good_stepsize
+    if chain isa Lux.AbstractExplicitLayer
+        # Lux chain(using component array later as vector_to_parameter need namedtuple)
+        initial_θ = collect(eltype(physdt),
+            vcat(ComponentArrays.ComponentArray(initial_nnθ)))
+    else
+        initial_θ = collect(eltype(physdt), initial_nnθ)
+    end
+
+    # adding ode parameter estimation
+    nparameters = length(initial_θ)
+    ninv = length(param)
+    priors = [
+        MvNormal(priorsNNw[1] * ones(nparameters),
+            LinearAlgebra.Diagonal(map(abs2, priorsNNw[2] .* ones(nparameters)))),
+    ]
+
+    # append Ode params to all paramvector
+    if ninv > 0
+        # shift ode params(initialise ode params by prior means)
+        initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
+        priors = vcat(priors, param)
+        nparameters += ninv
+    end
+
+    t0 = prob.tspan[1]
+    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
+    ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
+        phystd, l2std, autodiff, physdt, ninv, initial_nnθ)
+
+    try
+        ℓπ(t0, initial_θ[1:(nparameters - ninv)])
+    catch err
+        if isa(err, DimensionMismatch)
+            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
+        else
+            throw(err)
+        end
+    end
+
+    # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
+    metric = Metric(nparameters)
+    hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
+
+    println("physics Logpdf is : ", physloglikelihood(ℓπ, initial_θ))
+    println("prior Logpdf is : ", priorweights(ℓπ, initial_θ))
+    println("L2lossData Logpdf is : ", L2LossData(ℓπ, initial_θ))
+    println("L2loss2 Logpdf is : ", L2loss2(ℓπ, initial_θ))
+
+    # parallel sampling option
+    if nchains != 1
+        # Cache to store the chains
+        chains = Vector{Any}(undef, nchains)
+        statsc = Vector{Any}(undef, nchains)
+        samplesc = Vector{Any}(undef, nchains)
+
+        Threads.@threads for i in 1:nchains
+            # each chain has different initial NNparameter values(better posterior exploration)
+            initial_θ = vcat(randn(nparameters - ninv),
+                initial_θ[(nparameters - ninv + 1):end])
+            initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
+            integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate,
+                tempering_rate)
+            adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
+                StepSizeAdaptor(targetacceptancerate, integrator))
+            Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max,
+                    n_leapfrog, δ, λ), integrator)
+            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
+                progress = progress, verbose = verbose)
+
+            samplesc[i] = samples
+            statsc[i] = stats
+            mcmc_chain = Chains(hcat(samples...)')
+            chains[i] = mcmc_chain
+        end
+
+        return chains, samplesc, statsc
+    else
+        initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
+        integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate, tempering_rate)
+        adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
+            StepSizeAdaptor(targetacceptancerate, integrator))
+        Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog,
+                δ, λ), integrator)
+        samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
+            adaptor; progress = progress, verbose = verbose)
+
+        # return a chain(basic chain),samples and stats
+        matrix_samples = hcat(samples...)
+        mcmc_chain = MCMCChains.Chains(matrix_samples')
+        return mcmc_chain, samples, stats
+    end
+end
\ No newline at end of file
diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 9e137fb5d8..f65f1d659e 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -214,7 +214,6 @@ function SciMLBase.__solve(prob::SciMLBase.ODEProblem, alg::BNNODE, args...; dt
         push!(ensemblecurves, ensemblecurve)
     end
 
-    # estimated using all samples
     nnparams = length(θinit)
     estimnnparams = [Particles(reduce(hcat, samples[(end - numensemble):end])[i, :])
                      for i in 1:nnparams]
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 1f0189776f..0112cbaaa9 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -745,11 +745,6 @@ function ahmc_bayesian_pinn_ode(
     metric = Metric(nparameters)
     hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
 
-    println("physics Logpdf is : ", physloglikelihood(ℓπ, initial_θ))
-    println("prior Logpdf is : ", priorweights(ℓπ, initial_θ))
-    println("L2lossData Logpdf is : ", L2LossData(ℓπ, initial_θ))
-    println("L2loss2 Logpdf is : ", L2loss2(ℓπ, initial_θ))
-
     # parallel sampling option
     if nchains != 1
         # Cache to store the chains
diff --git a/test/BPINN_newform.jl b/test/BPINN_newform.jl
new file mode 100644
index 0000000000..fa2f04073e
--- /dev/null
+++ b/test/BPINN_newform.jl
@@ -0,0 +1,4354 @@
+# # Testing Code
+using Test, MCMCChains
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using Flux, OptimizationOptimisers, AdvancedHMC, Lux
+using Statistics, Random, Functors, ComponentArrays
+using NeuralPDE, MonteCarloMeasurements
+
+# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
+# on latest Julia version it performs much better for below tests
+Random.seed!(100)
+
+# for sampled params->lux ComponentArray
+function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
+    @assert length(ps_new) == Lux.parameterlength(ps)
+    i = 1
+    function get_ps(x)
+        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
+        i += length(x)
+        return z
+    end
+    return Functors.fmap(get_ps, ps)
+end
+
+## PROBLEM-1 (WITHOUT PARAMETER ESTIMATION)
+linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+linear = (u, p, t) -> cos(2 * π * t)
+tspan = (0.0, 2.0)
+u0 = 0.0
+prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+p = prob.p
+
+# Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
+ta = range(tspan[1], tspan[2], length = 300)
+u = [linear_analytic(u0, nothing, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+time = vec(collect(Float64, ta))
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# testing points for solve() call must match saveat(1/50.0) arg
+ta0 = range(tspan[1], tspan[2], length = 101)
+u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
+x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
+time1 = vec(collect(Float64, ta0))
+physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+init1, re1 = destructure(chainflux)
+θinit, st = Lux.setup(Random.default_rng(), chainlux)
+
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux,
+    draw_samples = 2500,
+    n_leapfrog = 30)
+
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux,
+    draw_samples = 2500,
+    n_leapfrog = 30)
+
+# can change training strategies by adding this to call (Quadratuer and GridTraining show good results but stochastics sampling techniques perform bad)
+# strategy = QuadratureTraining(; quadrature_alg = QuadGKJL(),
+#     reltol = 1e-6,
+#     abstol = 1e-3, maxiters = 1000,
+#     batch = 0)
+
+alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500,
+    n_leapfrog = 30)
+sol1flux = solve(prob, alg)
+
+alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500,
+    n_leapfrog = 30)
+sol1lux = solve(prob, alg)
+
+# testing points
+t = time
+# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+out = re1.(fhsamples1[(end - 500):end])
+yu = collect(out[i](t') for i in eachindex(out))
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+θ = [vector_to_parameters(fhsamples1[i], θinit) for i in 2000:2500]
+luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# --------------------- ahmc_bayesian_pinn_ode() call
+@test mean(abs.(x̂ .- meanscurve1)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve1)) < 0.005
+@test mean(abs.(x̂ .- meanscurve2)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve2)) < 0.005
+
+#--------------------- solve() call 
+@test mean(abs.(x̂1 .- sol1flux.ensemblesol[1])) < 0.05
+@test mean(abs.(physsol0_1 .- sol1flux.ensemblesol[1])) < 0.05
+@test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
+@test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
+
+## PROBLEM-1 (WITH PARAMETER ESTIMATION)
+linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+linear = (u, p, t) -> cos(p * t)
+tspan = (0.0, 2.0)
+u0 = 0.0
+p = 2 * pi
+prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
+
+# Numerical and Analytical Solutions
+sol1 = solve(prob, Tsit5(); saveat = 0.01)
+u = sol1.u
+time = sol1.t
+
+# BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) .+ (0.2 .* Array(u) .* randn(size(u))))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
+ta0 = range(tspan[1], tspan[2], length = 101)
+u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
+time1 = vec(collect(Float64, ta0))
+physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+using Plots, StatsPlots
+# plot(dataset[2], calderivatives(dataset)')
+yu = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+plot(yu, [linear_analytic(u0, p, t) for t in yu])
+chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+init1, re1 = destructure(chainflux1)
+θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0f0,
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(9,
+            0.5),
+    ],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+    dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 10.0),
+    l2std = [0.005], phystd = [0.01],
+    param = [Normal(11, 6)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+# original paper (pure data 0 1)
+sol1flux = solve(prob, alg)
+sol1flux.estimated_ode_params
+# pure data method 1 1
+sol2flux = solve(prob, alg)
+sol2flux.estimated_ode_params
+# pure data method 1 0
+sol3flux = solve(prob, alg)
+sol3flux.estimated_ode_params
+# deri collocation
+sol4flux = solve(prob, alg)
+sol4flux.estimated_ode_params
+# collocation
+sol5flux = solve(prob, alg)
+sol5flux.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux = solve(prob, alg)
+sol6flux.estimated_ode_params
+# 2500 iters
+sol7flux = solve(prob, alg)
+sol7flux.estimated_ode_params
+
+plotly()
+plot!(yu, sol1flux.ensemblesol[1])
+plot!(yu, sol2flux.ensemblesol[1])
+plot!(yu, sol3flux.ensemblesol[1])
+plot!(yu, sol4flux.ensemblesol[1])
+plot!(yu, sol5flux.ensemblesol[1])
+plot!(yu, sol6flux.ensemblesol[1])
+
+plot!(dataset[2], dataset[1])
+
+# plot!(sol4flux.ensemblesol[1])
+# plot!(sol5flux.ensemblesol[1])
+
+sol2flux.estimated_ode_params
+
+sol1flux.estimated_ode_params
+
+sol3flux.estimated_ode_params
+
+sol4flux.estimated_ode_params
+
+sol5flux.estimated_ode_params
+
+alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0f0,
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(9,
+            0.5),
+    ],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+
+sol2lux = solve(prob, alg)
+
+# testing points
+t = time
+# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+out = re1.([fhsamples1[i][1:22] for i in 2000:2500])
+yu = collect(out[i](t') for i in eachindex(out))
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+θ = [vector_to_parameters(fhsamples2[i][1:(end - 1)], θinit) for i in 2000:2500]
+luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# --------------------- ahmc_bayesian_pinn_ode() call  
+@test mean(abs.(physsol1 .- meanscurve1)) < 0.15
+@test mean(abs.(physsol1 .- meanscurve2)) < 0.15
+
+# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.25 * p)
+@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.25 * p)
+
+#-------------------------- solve() call  
+@test mean(abs.(physsol1_1 .- sol2flux.ensemblesol[1])) < 8e-2
+@test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
+
+# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+@test abs(p - sol1flux.estimated_ode_params[1]) < abs(0.15 * p)
+@test abs(p - sol2lux.estimated_ode_params[1]) < abs(0.15 * p)
+
+## PROBLEM-2
+linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = -5.0
+prob = ODEProblem(linear, u0, tspan, p)
+linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
+
+# SOLUTION AND CREATE DATASET
+sol = solve(prob, Tsit5(); saveat = 0.1)
+u = sol.u
+time = sol.t
+x̂ = u .+ (u .* 0.2) .* randn(size(u))
+dataset = [x̂, time]
+t = sol.t
+physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)]
+
+ta0 = range(tspan[1], tspan[2], length = 501)
+u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+time1 = vec(collect(Float64, ta0))
+physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux12 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh),
+    Flux.Dense(6, 1)) |> Flux.f64
+chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+init1, re1 = destructure(chainflux12)
+θinit, st = Lux.setup(Random.default_rng(), chainlux12)
+
+using Flux
+using Random
+
+function derivatives(chainflux, dataset)
+    loss(x, y) = Flux.mse(chainflux(x), y)
+    optimizer = Flux.Optimise.ADAM(0.01)
+    epochs = 2500
+    for epoch in 1:epochs
+        Flux.train!(loss, Flux.params(chainflux), [(dataset[2]', dataset[1]')], optimizer)
+    end
+    getgradient(chainflux, dataset)
+end
+
+function getgradient(chainflux, dataset)
+    return (chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64)))) .-
+            chainflux(dataset[end]')) ./
+           sqrt(eps(eltype(dataset[end][1])))
+end
+
+ans = derivatives(chainflux12, dataset)
+
+init3, re = destructure(chainflux12)
+init2 == init1
+init3 == init2
+plot!(dataset[end], ans')
+plot!(dataset[end], chainflux12(dataset[end]')')
+
+ars = getgradient(chainflux12, dataset)
+
+plot!(dataset[end], ars')
+
+fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob,
+    chainflux12,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03],
+    priorsNNw = (0.0,
+        10.0),
+    n_leapfrog = 30)
+
+fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(prob,
+    chainflux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30)
+
+fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    n_leapfrog = 30)
+
+fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30)
+
+alg1 = NeuralPDE.BNNODE(chainflux12,
+    dataset = dataset,
+    draw_samples = 500,
+    l2std = [0.01],
+    phystd = [
+        0.03,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30, progress = true)
+
+# original paper (pure data 0 1)
+sol1flux_pestim = solve(prob, alg1)
+sol1flux_pestim.estimated_ode_params
+# pure data method 1 1
+sol2flux_pestim = solve(prob, alg1)
+sol2flux_pestim.estimated_ode_params
+# pure data method 1 0
+sol3flux_pestim = solve(prob, alg1)
+sol3flux_pestim.estimated_ode_params
+# deri collocation
+sol4flux_pestim = solve(prob, alg1)
+sol4flux_pestim.estimated_ode_params
+# collocation
+sol5flux_pestim = solve(prob, alg1)
+sol5flux_pestim.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux_pestim = solve(prob, alg1)
+sol6flux_pestim.estimated_ode_params
+
+using Plots, StatsPlots
+ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+plot(time, u)
+plot!(ars, sol1flux_pestim.ensemblesol[1])
+plot!(ars, sol2flux_pestim.ensemblesol[1])
+plot!(ars, sol3flux_pestim.ensemblesol[1])
+plot!(ars, sol4flux_pestim.ensemblesol[1])
+plot!(ars, sol5flux_pestim.ensemblesol[1])
+plot!(ars, sol6flux_pestim.ensemblesol[1])
+
+sol3flux_pestim.estimated_ode_params
+
+sol4flux_pestim.estimated_ode_params
+
+sol5flux_pestim.estimated_ode_params
+
+sol6flux_pestim.estimated_ode_params
+
+ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+init, re1 = destructure(chainflux12)
+init
+init1
+alg = NeuralPDE.BNNODE(chainlux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30)
+
+sol3lux_pestim = solve(prob, alg)
+
+# testing timepoints
+t = sol.t
+#------------------------------ ahmc_bayesian_pinn_ode() call 
+# Mean of last 500 sampled parameter's curves(flux chains)[Ensemble predictions]
+out = re1.([fhsamplesflux12[i][1:61] for i in 1000:1500])
+yu = [out[i](t') for i in eachindex(out)]
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1_1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+out = re1.([fhsamplesflux22[i][1:61] for i in 1000:1500])
+yu = [out[i](t') for i in eachindex(out)]
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1_2 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+@test mean(abs.(sol.u .- meanscurve1_1)) < 1e-2
+@test mean(abs.(physsol1 .- meanscurve1_1)) < 1e-2
+@test mean(abs.(sol.u .- meanscurve1_2)) < 5e-2
+@test mean(abs.(physsol1 .- meanscurve1_2)) < 5e-2
+
+# estimated parameters(flux chain)
+param1 = mean(i[62] for i in fhsamplesflux22[1000:1500])
+@test abs(param1 - p) < abs(0.3 * p)
+
+# Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
+θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
+luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500]
+luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+@test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
+@test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
+@test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
+@test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
+
+# estimated parameters(lux chain)
+param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
+@test abs(param1 - p) < abs(0.3 * p)
+
+#-------------------------- solve() call 
+# (flux chain)
+@test mean(abs.(physsol2 .- sol3flux_pestim.ensemblesol[1])) < 0.15
+# estimated parameters(flux chain)
+param1 = sol3flux_pestim.estimated_ode_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
+
+# (lux chain)
+@test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
+# estimated parameters(lux chain)
+param1 = sol3lux_pestim.estimated_ode_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
+
+using Plots, StatsPlots
+using NoiseRobustDifferentiation, Weave, DataInterpolations
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood
+# # 25 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+#     draw_samples = 1500, physdt = 1 / 50.0f0, phystd = [0.01],
+#     l2std = [0.01],
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1 = solve(prob, alg)
+# sol2flux1.estimated_ode_params[1] #6.41722 Particles{Float64, 1}, 6.02404 Particles{Float64, 1}
+# sol2flux2 = solve(prob, alg)
+# sol2flux2.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.07509 Particles{Float64, 1}
+# sol2flux3 = solve(prob, alg)
+# sol2flux3.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.00825 Particles{Float64, 1}
+
+# # 50 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11 = solve(prob, alg)
+# sol2flux11.estimated_ode_params[1] #5.71268 Particles{Float64, 1}, 6.07242 Particles{Float64, 1}
+# sol2flux22 = solve(prob, alg)
+# sol2flux22.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.04837 Particles{Float64, 1}
+# sol2flux33 = solve(prob, alg)
+# sol2flux33.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.02838 Particles{Float64, 1}
+
+# # 100 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111 = solve(prob, alg)
+# sol2flux111.estimated_ode_params[1] #6.59097 Particles{Float64, 1}, 5.89384 Particles{Float64, 1}
+# sol2flux222 = solve(prob, alg)
+# sol2flux222.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.88216 Particles{Float64, 1}
+# sol2flux333 = solve(prob, alg)
+# sol2flux333.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.85327 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, full likelihood cdm
+# # 25 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1_cdm = solve(prob, alg)
+# sol2flux1_cdm.estimated_ode_params[1]# 6.50506 Particles{Float64, 1} ,6.38963 Particles{Float64, 1}
+# sol2flux2_cdm = solve(prob, alg)
+# sol2flux2_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.39817 Particles{Float64, 1}
+# sol2flux3_cdm = solve(prob, alg)
+# sol2flux3_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.36296 Particles{Float64, 1}
+
+# # 50 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11_cdm = solve(prob, alg)
+# sol2flux11_cdm.estimated_ode_params[1] #6.52951 Particles{Float64, 1},5.15621 Particles{Float64, 1}
+# sol2flux22_cdm = solve(prob, alg)
+# sol2flux22_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.16363 Particles{Float64, 1}
+# sol2flux33_cdm = solve(prob, alg)
+# sol2flux33_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.15591 Particles{Float64, 1}
+
+# # 100 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111_cdm = solve(prob, alg)
+# sol2flux111_cdm.estimated_ode_params[1] #6.74338 Particles{Float64, 1}, 9.72422 Particles{Float64, 1}
+# sol2flux222_cdm = solve(prob, alg)
+# sol2flux222_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.71991 Particles{Float64, 1}
+# sol2flux333_cdm = solve(prob, alg)
+# sol2flux333_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.75045 Particles{Float64, 1}
+
+# --------------------------------------------------------------------------------------
+#                              NEW SERIES OF TESTS (IN ORDER OF EXECUTION)
+#  -------------------------------------------------------------------------------------
+# original paper implementaion
+# 25 points
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, u .+ 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset1 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+# scatter!(time, u)
+# dataset
+# scatter!(dataset1[2], dataset1[1])
+# plot(time, physsol1)
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_normal = solve(prob, alg)
+sol2flux1_normal.estimated_ode_params[1]  #7.70593 Particles{Float64, 1}, 6.36096 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
+sol2flux2_normal = solve(prob, alg)
+sol2flux2_normal.estimated_ode_params[1] #6.66347 Particles{Float64, 1}, 6.36974 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
+sol2flux3_normal = solve(prob, alg)
+sol2flux3_normal.estimated_ode_params[1] #6.84827 Particles{Float64, 1}, 6.29555 Particles{Float64, 1} | 6.39947 Particles{Float64, 1}
+
+# 50 points
+ta = range(tspan[1], tspan[2], length = 50)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset2 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_normal = solve(prob, alg)
+sol2flux11_normal.estimated_ode_params[1] #7.83577 Particles{Float64, 1},6.24652 Particles{Float64, 1} | 6.34495 Particles{Float64, 1}
+sol2flux22_normal = solve(prob, alg)
+sol2flux22_normal.estimated_ode_params[1] #6.49477 Particles{Float64, 1},6.2118 Particles{Float64, 1} | 6.32476 Particles{Float64, 1}
+sol2flux33_normal = solve(prob, alg)
+sol2flux33_normal.estimated_ode_params[1] #6.47421 Particles{Float64, 1},6.33687 Particles{Float64, 1} | 6.2448 Particles{Float64, 1}
+
+# 100 points
+ta = range(tspan[1], tspan[2], length = 100)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset3 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_normal = solve(prob, alg)
+sol2flux111_normal.estimated_ode_params[1] #5.96604 Particles{Float64, 1},5.99588 Particles{Float64, 1} | 6.19805 Particles{Float64, 1}
+sol2flux222_normal = solve(prob, alg)
+sol2flux222_normal.estimated_ode_params[1] #6.05432 Particles{Float64, 1},6.0768 Particles{Float64, 1} | 6.22948 Particles{Float64, 1}
+sol2flux333_normal = solve(prob, alg)
+sol2flux333_normal.estimated_ode_params[1] #6.08856 Particles{Float64, 1},5.94819 Particles{Float64, 1} | 6.2551 Particles{Float64, 1}
+
+# LOTKA VOLTERRA CASE 
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u01 = [1.0, 1.0]
+p1 = [1.5, 1.0, 3.0, 1.0]
+tspan1 = (0.0, 6.0)
+prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
+
+# chainlux = Lux.Chain(Lux.Dense(1, 7, Lux.tanh), Lux.Dense(7, 7, Lux.tanh), Lux.Dense(7, 2))
+chainflux1 = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2))
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t1 = collect(Float64, prob1.tspan[1]:(1 / 50.0):prob1.tspan[2])
+
+# --------------------------------------------------------------------------
+# original paper implementaion lotka volterra
+# 31 points  
+solution1 = solve(prob1, Tsit5(); saveat = 0.1)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] .+ 0.3 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] .+ 0.3 .* u1[2, :] .* randn(length(u1[2, :]))
+dataset2_1 = [x1, y1, time1]
+plot(dataset2_1[end], dataset2_1[1])
+plot!(dataset2_1[end], dataset2_1[2])
+plot!(time1, u1[1, :])
+plot!(time1, u1[2, :])
+
+alg1 = NeuralPDE.BNNODE(chainflux1,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    physdt = 1 / 20.0,
+    l2std = [
+        0.2,
+        0.2,
+    ],
+    phystd = [
+        0.5,
+        0.5,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(4,
+            3),
+        Normal(-2,
+            4),
+        Normal(0,
+            5),
+        Normal(2.5,
+            2)],
+    n_leapfrog = 30, progress = true)
+
+# original paper (pure data 0 1)
+sol1flux1_lotka = solve(prob1, alg1)
+sol1flux1_lotka.estimated_ode_params
+# pure data method 1 1
+sol2flux1_lotka = solve(prob1, alg1)
+sol2flux1_lotka.estimated_ode_params
+# pure data method 1 0
+sol3flux1_lotka = solve(prob1, alg1)
+sol3flux1_lotka.estimated_ode_params
+# deri collocation
+sol4flux1_lotka = solve(prob1, alg1)
+sol4flux1_lotka.estimated_ode_params
+# collocation
+sol5flux1_lotka = solve(prob1, alg1)
+sol5flux1_lotka.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux1_lotka = solve(prob1, alg1)
+sol6flux1_lotka.estimated_ode_params
+
+sol7flux1_lotka = solve(prob1, alg1)
+sol7flux1_lotka.estimated_ode_params
+
+using Plots, StatsPlots
+plot(dataset2_1[3], u1[1, :])
+plot!(dataset2_1[3], u1[2, :])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol5flux1_normal.ensemblesol[2])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]),
+    sol1flux1_normal.ensemblesol[1],
+    legend = :outerbottomleft)
+sol1flux2_normal = solve(prob1, alg1)
+sol1flux2_normal.estimated_ode_params  #|
+sol1flux3_normal = solve(prob1, alg1)
+sol1flux3_normal.estimated_ode_params  #|
+sol1flux4_normal = solve(prob1, alg1)
+sol1flux4_normal.estimated_ode_params
+
+plotly()
+plot!(title = "yuh")
+plot!(dataset2_1[3], dataset2_1[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux1_normal.ensemblesol[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux2_normal.ensemblesol[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux3_normal.ensemblesol[2])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux4_normal.ensemblesol[1])
+plot(time1, u1[1, :])
+plot!(time1, u1[2, :])
+
+ars = chainflux1(dataset2_1[end]')
+plot(ars[1, :])
+plot!(ars[2, :])
+
+function calculate_derivatives(dataset)
+    u = dataset[1]
+    u1 = dataset[2]
+    t = dataset[end]
+    # control points
+    n = Int(floor(length(t) / 10))
+    # spline for datasetvalues(solution) 
+    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    interp = CubicSpline(u, t)
+    interp1 = CubicSpline(u1, t)
+    # derrivatives interpolation
+    dx = t[2] - t[1]
+    time = collect(t[1]:dx:t[end])
+    smoothu = [interp(i) for i in time]
+    smoothu1 = [interp1(i) for i in time]
+    # derivative of the spline (must match function derivative) 
+    û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
+    û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
+    # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
+    # FDM
+    # û1 = diff(u) / dx
+    # dataset[1] and smoothu are almost equal(rounding errors)
+    return û, û1
+    # return 1
+end
+
+ar = calculate_derivatives(dataset2_1)
+plot(ar[1])
+plot!(ar[2])
+
+# 61 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.1)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_2 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.1,
+        0.1,
+    ],
+    phystd = [
+        0.1,
+        0.1,
+    ],
+    priorsNNw = (0.0,
+        5.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_normal = solve(prob1, alg1)
+sol1flux11_normal.estimated_ode_params #|
+sol1flux22_normal = solve(prob1, alg1)
+sol1flux22_normal.estimated_ode_params #|
+sol1flux33_normal = solve(prob1, alg1)
+sol1flux33_normal.estimated_ode_params #|
+
+# 121 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_3 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.1,
+        0.1,
+    ],
+    phystd = [
+        0.1,
+        0.1,
+    ],
+    priorsNNw = (0.0,
+        5.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_normal = solve(prob1, alg1)
+sol1flux111_normal.estimated_ode_params #|
+sol1flux222_normal = solve(prob1, alg1)
+sol1flux222_normal.estimated_ode_params #|
+sol1flux333_normal = solve(prob1, alg1)
+sol1flux333_normal.estimated_ode_params #| 
+
+# -------------------------------------------------------------------- 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:02:30
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:01:54
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:01:59
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:44
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:41
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:41
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:52
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:49
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:50
+
+# # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+# physics Logpdf is : -6.659143464386241e7
+# prior Logpdf is : -150.30074579848434
+# L2lossData Logpdf is : -6.03075717462954e6
+# Sampling 100%|███████████████████████████████| Time: 0:04:54
+
+# physics Logpdf is : -8.70012053004202e8
+# prior Logpdf is : -150.3750892952511
+# L2lossData Logpdf is : -6.967914805207133e6
+# Sampling 100%|███████████████████████████████| Time: 0:05:09
+
+# physics Logpdf is : -5.417241281343099e7
+# prior Logpdf is : -150.52079555737976
+# L2lossData Logpdf is : -4.195953436792884e6
+# Sampling 100%|███████████████████████████████| Time: 0:05:01
+
+# physics Logpdf is : -4.579552981943833e8
+# prior Logpdf is : -150.30491731974283
+# L2lossData Logpdf is : -8.595475827260146e6
+# Sampling 100%|███████████████████████████████| Time: 0:06:08
+
+# physics Logpdf is : -1.989281834955769e7
+# prior Logpdf is : -150.16009042727543
+# L2lossData Logpdf is : -1.121270659669029e7
+# Sampling 100%|███████████████████████████████| Time: 0:05:38
+
+# physics Logpdf is : -8.683829147264534e8
+# prior Logpdf is : -150.37824872259102
+# L2lossData Logpdf is : -1.0887662888035845e7
+# Sampling 100%|███████████████████████████████| Time: 0:05:50
+
+# physics Logpdf is : -3.1944760610332566e8
+# prior Logpdf is : -150.33610348737565
+# L2lossData Logpdf is : -1.215458786744478e7
+# Sampling 100%|███████████████████████████████| Time: 0:10:50
+
+# physics Logpdf is : -3.2884572300341567e6
+# prior Logpdf is : -150.21002268156343
+# L2lossData Logpdf is : -1.102536731511176e7
+# Sampling 100%|███████████████████████████████| Time: 0:09:53
+
+# physics Logpdf is : -5.31293521002414e8
+# prior Logpdf is : -150.20948536040126
+# L2lossData Logpdf is : -1.818717239584132e7
+# Sampling 100%|███████████████████████████████| Time: 0:08:53
+
+# ----------------------------------------------------------
+# Full likelihood no l2 only new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new = solve(prob, alg)
+sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.21662 Particles{Float64, 1}
+sol2flux2_new = solve(prob, alg)
+sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 7.14238 Particles{Float64, 1}
+sol2flux3_new = solve(prob, alg)
+sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.79159 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new = solve(prob, alg)
+sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 5.33467 Particles{Float64, 1}
+sol2flux22_new = solve(prob, alg)
+sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.52419 Particles{Float64, 1}
+sol2flux33_new = solve(prob, alg)
+sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 5.36921 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new = solve(prob, alg)
+sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.45333 Particles{Float64, 1}
+sol2flux222_new = solve(prob, alg)
+sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 4.64417 Particles{Float64, 1}
+sol2flux333_new = solve(prob, alg)
+sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 5.88037 Particles{Float64, 1}
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new_all = solve(prob, alg)
+sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.4358 Particles{Float64, 1}
+sol2flux2_new_all = solve(prob, alg)
+sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 6.52449 Particles{Float64, 1}
+sol2flux3_new_all = solve(prob, alg)
+sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.34188 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new_all = solve(prob, alg)
+sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.37889 Particles{Float64, 1}
+sol2flux22_new_all = solve(prob, alg)
+sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.34747 Particles{Float64, 1}
+sol2flux33_new_all = solve(prob, alg)
+sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.39699 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new_all = solve(prob, alg)
+sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.24327 Particles{Float64, 1}
+sol2flux222_new_all = solve(prob, alg)
+sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 6.23928 Particles{Float64, 1}
+sol2flux333_new_all = solve(prob, alg)
+sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 6.2145 Particles{Float64, 1}
+
+# ---------------------------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients) lotka volterra
+# 36 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_new_all = solve(prob1, alg1)
+sol1flux1_new_all.estimated_ode_params[1]  #|
+sol1flux2_new_all = solve(prob1, alg1)
+sol1flux2_new_all.estimated_ode_params[1] #|
+sol1flux3_new_all = solve(prob1, alg1)
+sol1flux3_new_all.estimated_ode_params[1] #|
+
+# 61 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_new_all = solve(prob1, alg1)
+sol1flux11_new_all.estimated_ode_params[1] #|
+sol1flux22_new_all = solve(prob1, alg1)
+sol1flux22_new_all.estimated_ode_params[1] #|
+sol1flux33_new_all = solve(prob1, alg1)
+sol1flux33_new_all.estimated_ode_params[1] #|
+
+# 121 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_new_all = solve(prob1, alg1)
+sol1flux111_new_all.estimated_ode_params[1] #|
+sol1flux222_new_all = solve(prob1, alg1)
+sol1flux222_new_all.estimated_ode_params[1] #|
+sol1flux333_new_all = solve(prob1, alg1)
+sol1flux333_new_all.estimated_ode_params[1] #|
+# -------------------------------------------------------------------- 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:32
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:19
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:31
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:45
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:20
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:20
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:04:57
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:05:26
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:05:01
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients)
+# 25 points
+# 1*,2*,  
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_newdata_all = solve(prob, alg)
+sol2flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 5.73072 Particles{Float64, 1}
+sol2flux2_newdata_all = solve(prob, alg)
+sol2flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 5.71597 Particles{Float64, 1}
+sol2flux3_newdata_all = solve(prob, alg)
+sol2flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 5.7313 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_newdata_all = solve(prob, alg)
+sol2flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.07153 Particles{Float64, 1}
+sol2flux22_newdata_all = solve(prob, alg)
+sol2flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.06623 Particles{Float64, 1}
+sol2flux33_newdata_all = solve(prob, alg)
+sol2flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.12748 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_newdata_all = solve(prob, alg)
+sol2flux111_newdata_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.26222 Particles{Float64, 1}
+sol2flux222_newdata_all = solve(prob, alg)
+sol2flux222_newdata_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 5.86494 Particles{Float64, 1}
+sol2flux333_newdata_all = solve(prob, alg)
+sol2flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} |  
+
+# ---------------------------------------------------------------------------
+
+# LOTKA VOLTERRA CASE
+using Plots, StatsPlots
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u01 = [1.0, 1.0]
+p1 = [1.5, 1.0, 3.0, 1.0]
+tspan1 = (0.0, 6.0)
+prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
+
+chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t1 = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+# --------------------------------------------------------------------------
+# original paper implementaion
+# 25 points  
+solution1 = solve(prob1, Tsit5(); saveat = 0.2)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_1 = [x1, y1, time1]
+
+plot(time1, u1[1, :])
+plot!(time1, u1[2, :])
+scatter!(dataset2_1[3], dataset2_1[1])
+scatter!(dataset2_1[3], dataset2_1[2])
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_normal = solve(prob1, alg1)
+sol1flux1_normal.estimated_ode_params[1]  #|
+sol1flux2_normal = solve(prob1, alg1)
+sol1flux2_normal.estimated_ode_params[1] #|
+sol1flux3_normal = solve(prob1, alg1)
+sol1flux3_normal.estimated_ode_params[1] #|
+
+# 50 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_2 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_normal = solve(prob1, alg1)
+sol1flux11_normal.estimated_ode_params[1] #|
+sol1flux22_normal = solve(prob1, alg1)
+sol1flux22_normal.estimated_ode_params[1] #|
+sol1flux33_normal = solve(prob1, alg1)
+sol1flux33_normal.estimated_ode_params[1] #|
+
+# 100 points
+solution = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_3 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_normal = solve(prob1, alg1)
+sol1flux111_normal.estimated_ode_params[1] #|
+sol1flux222_normal = solve(prob1, alg1)
+sol1flux222_normal.estimated_ode_params[1] #|
+sol1flux333_normal = solve(prob1, alg1)
+sol1flux333_normal.estimated_ode_params[1] #|
+
+# --------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood no l2 only new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new = solve(prob, alg)
+sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
+sol2flux2_new = solve(prob, alg)
+sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
+sol2flux3_new = solve(prob, alg)
+sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new = solve(prob, alg)
+sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
+sol2flux22_new = solve(prob, alg)
+sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
+sol2flux33_new = solve(prob, alg)
+sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new = solve(prob, alg)
+sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}   |
+sol2flux222_new = solve(prob, alg)
+sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}     |
+sol2flux333_new = solve(prob, alg)
+sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new_all = solve(prob, alg)
+sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1}  |
+sol2flux2_new_all = solve(prob, alg)
+sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}    |
+sol2flux3_new_all = solve(prob, alg)
+sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}   |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new_all = solve(prob, alg)
+sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
+sol2flux22_new_all = solve(prob, alg)
+sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
+sol2flux33_new_all = solve(prob, alg)
+sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new_all = solve(prob, alg)
+sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}  |
+sol2flux222_new_all = solve(prob, alg)
+sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}    |
+sol2flux333_new_all = solve(prob, alg)
+sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}  |
+
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients)
+# 25 points 
+# *1,*2 vs *2.5
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_newdata_all = solve(prob, alg)
+sol1flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
+sol1flux2_newdata_all = solve(prob, alg)
+sol1flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
+sol1flux3_newdata_all = solve(prob, alg)
+sol1flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_newdata_all = solve(prob, alg)
+sol1flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}    |
+sol1flux22_newdata_all = solve(prob, alg)
+sol1flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}    |
+sol1flux33_newdata_all = solve(prob, alg)
+sol1flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1}   |
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_newdata_all = solve(prob, alg)
+sol1flux111_newdata_all.estimated_ode_params[1]  #|
+sol1flux222_newdata_all = solve(prob, alg)
+sol1flux222_newdata_all.estimated_ode_params[1]  #|
+sol1flux333_newdata_all = solve(prob, alg)
+sol1flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
+
+# ------------------------------------------------------------------------------------------------------------------------------
+
+# sol2flux111.estimated_ode_params[1]
+# # mine *5
+# 7.03386Particles{Float64, 1}
+# # normal
+# 6.38951Particles{Float64, 1}
+# 6.67657Particles{Float64, 1}
+# # mine *10
+# 7.53672Particles{Float64, 1}
+# # mine *2
+# 6.29005Particles{Float64, 1}
+# 6.29844Particles{Float64, 1}
+
+# # new mine *2
+# 6.39008Particles{Float64, 1}
+# 6.22071Particles{Float64, 1}
+# 6.15611Particles{Float64, 1}
+
+# # new mine *2 tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
+# 6.25549Particles{Float64, 1}
+# ----------------------------------------------------------
+
+# ---------------------------------------------------
+
+function calculate_derivatives1(dataset)
+    x̂, time = dataset
+    num_points = length(x̂)
+    # Initialize an array to store the derivative values.
+    derivatives = similar(x̂)
+
+    for i in 2:(num_points - 1)
+        # Calculate the first-order derivative using central differences.
+        Δt_forward = time[i + 1] - time[i]
+        Δt_backward = time[i] - time[i - 1]
+
+        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+        derivatives[i] = derivative
+    end
+
+    # Derivatives at the endpoints can be calculated using forward or backward differences.
+    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+    return derivatives
+end
+
+function calculate_derivatives2(dataset)
+    u = dataset[1]
+    t = dataset[2]
+    # control points
+    n = Int(floor(length(t) / 10))
+    # spline for datasetvalues(solution) 
+    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    interp = CubicSpline(u, t)
+    # derrivatives interpolation
+    dx = t[2] - t[1]
+    time = collect(t[1]:dx:t[end])
+    smoothu = [interp(i) for i in time]
+    # derivative of the spline (must match function derivative) 
+    û = tvdiff(smoothu, 20, 0.03, dx = dx, ε = 1)
+    # tvdiff(smoothu, 100, 0.1, dx = dx)
+    # 
+    # 
+    # FDM
+    û1 = diff(u) / dx
+    # dataset[1] and smoothu are almost equal(rounding errors)
+    return û, time, smoothu, û1
+end
+
+# need to do this for all datasets
+c = [linear(prob.u0, p, t) for t in dataset3[2]] #ideal case
+b = calculate_derivatives1(dataset2) #central diffs
+# a = calculate_derivatives2(dataset) #tvdiff(smoothu, 100, 0.1, dx = dx)
+d = calculate_derivatives2(dataset1) #tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
+d = calculate_derivatives2(dataset2)
+d = calculate_derivatives2(dataset3)
+mean(abs2.(c .- b))
+mean(abs2.(c .- d[1]))
+loss(model, x, y) = mean(abs2.(model(x) .- y));
+scatter!(prob.u0 .+ (prob.tspan[2] .- dataset3[2]) .* chainflux1(dataset3[2]')')
+loss(chainflux1, dataset3[2]', dataset3[1]')
+# mean(abs2.(c[1:24] .- a[4]))
+plot(c, label = "ideal deriv")
+plot!(b, label = "Centraldiff deriv")
+# plot!(a[1], label = "tvdiff(0.1,def) derivatives") 
+plot!(d[1], label = "tvdiff(0.035,20) derivatives")
+plotly()
+
+# GridTraining , NoiseRobustDiff dataset[2][2]-dataset[2][1] l2std
+# 25 points 
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+time1 = collect(tspan[1]:(1 / 50.0):tspan[2])
+physsol = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+plot(physsol, label = "solution")
+
+# plots from 32(deriv)
+# for d
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux1 = solve(prob, alg)
+n2_sol2flux1.estimated_ode_params[1]
+# with extra likelihood 
+# 10.2011Particles{Float64, 1}
+
+# without extra likelihood 
+# 6.25791Particles{Float64, 1}
+# 6.29539Particles{Float64, 1}
+
+plot!(n2_sol2flux1.ensemblesol[1], label = "tvdiff(0.035,1) derivpar")
+plot(dataset[1])
+plot!(physsol1)
+# for a
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux2 = solve(prob, alg)
+n2_sol2flux2.estimated_ode_params[1]
+# with extra likelihood
+# 8.73602Particles{Float64, 1}
+# without extra likelihood
+
+plot!(n2_sol2flux2.ensemblesol[1],
+    label = "tvdiff(0.1,def) derivatives",
+    legend = :outerbottomleft)
+
+# for b
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux3 = solve(prob, alg)
+n2_sol2flux3.estimated_ode_params[1]
+plot!(n2_sol2flux3.ensemblesol[1], label = "Centraldiff deriv")
+
+# for c
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux4 = solve(prob, alg)
+n2_sol2flux4.estimated_ode_params[1]
+plot!(n2_sol2flux4.ensemblesol[1], label = "ideal deriv")
+
+# 50 points 
+
+ta = range(tspan[1], tspan[2], length = 50)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux11 = solve(prob, alg)
+n2_sol2flux11.estimated_ode_params[1]
+
+# 5.90049Particles{Float64, 1}
+# 100 points
+ta = range(tspan[1], tspan[2], length = 100)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux111 = solve(prob, alg)
+n2_sol2flux111.estimated_ode_params[1]
+plot!(n2_sol2flux111.ensemblesol[1])
+8.88555Particles{Float64, 1}
+
+# 7.15353Particles{Float64, 1}
+# 6.21059 Particles{Float64, 1}
+# 6.31836Particles{Float64, 1}
+0.1 * p
+# ----------------------------------------------------------
+
+# Gives the linear interpolation value at t=3.5
+
+# # Problem 1 with param esimation
+# # dataset 0-1 2 percent noise
+# p = 6.283185307179586
+# # partial_logdensity
+# 6.3549Particles{Float64, 1}
+# # full log_density
+# 6.34667Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2lux.estimated_ode_params[1]
+
+# # dataset 0-1 20 percent noise
+# # partial log_density
+# 6.30244Particles{Float64, 1}
+# # full log_density
+# 6.24637Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # dataset 0-2 20percent noise
+# # partial log_density
+# 6.24948Particles{Float64, 1}
+# # full log_density
+# 6.26095Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+# linear = (u, p, t) -> cos(p * t)
+# tspan = (0.0, 2.0)
+
+# # dataset 0-1 2 percent noise
+# p = 6.283185307179586
+# # partial_logdensity
+# 6.3549Particles{Float64, 1}
+# # full log_density
+# 6.34667Particles{Float64, 1}
+
+# # dataset 0-1 20 percent noise
+# # partial log_density
+# 6.30244Particles{Float64, 1}
+# # full log_density
+# 6.24637Particles{Float64, 1}
+
+# # dataset 0-2 20percent noise
+# # partial log_density
+# 6.24948Particles{Float64, 1}
+# # full log_density
+# 6.26095Particles{Float64, 1}
+
+# # dataset 0-2 20percent noise 50 points(above all are 100 points)
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # i kinda win on 25 points again
+# # dataset 0-2 20percent noise 25 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # i win with 25 points
+# # dataset 0-1 20percent noise 25 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# # new
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# # New
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # (9,2.5)(above are (9,0.5))
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # just prev was repeat(just change)
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # i lose on 0-1,50 points
+# # dataset 0-1 20percent noise 50 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # (9,2.5) (above are (9,0.5))
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+# # Problem 1 with param estimation
+# # physdt=1/20, Full likelihood new 0.5*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux1 = solve(prob, alg)
+# n05_sol2flux1.estimated_ode_params[1] #6.90953 Particles{Float64, 1}
+# n05_sol2flux2 = solve(prob, alg)
+# n05_sol2flux2.estimated_ode_params[1] #6.82374 Particles{Float64, 1}
+# n05_sol2flux3 = solve(prob, alg)
+# n05_sol2flux3.estimated_ode_params[1] #6.84465 Particles{Float64, 1}
+
+# using Plots, StatsPlots
+# plot(n05_sol2flux3.ensemblesol[1])
+# plot!(physsol1)
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux11 = solve(prob, alg)
+# n05_sol2flux11.estimated_ode_params[1] #7.0262 Particles{Float64, 1}
+# n05_sol2flux22 = solve(prob, alg)
+# n05_sol2flux22.estimated_ode_params[1] #5.56438 Particles{Float64, 1}
+# n05_sol2flux33 = solve(prob, alg)
+# n05_sol2flux33.estimated_ode_params[1] #7.27189 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux111 = solve(prob, alg)
+# n05_sol2flux111.estimated_ode_params[1] #6.90549 Particles{Float64, 1}
+# n05_sol2flux222 = solve(prob, alg)
+# n05_sol2flux222.estimated_ode_params[1] #5.42436 Particles{Float64, 1}
+# n05_sol2flux333 = solve(prob, alg)
+# n05_sol2flux333.estimated_ode_params[1] #6.05832 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new 2*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux1 = solve(prob, alg)
+# n2_sol2flux1.estimated_ode_params[1]#6.9087 Particles{Float64, 1}
+# n2_sol2flux2 = solve(prob, alg)
+# n2_sol2flux2.estimated_ode_params[1]#6.86507 Particles{Float64, 1}
+# n2_sol2flux3 = solve(prob, alg)
+# n2_sol2flux3.estimated_ode_params[1]#6.59206 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux11 = solve(prob, alg)
+# n2_sol2flux11.estimated_ode_params[1]#7.3715 Particles{Float64, 1}
+# n2_sol2flux22 = solve(prob, alg)
+# n2_sol2flux22.estimated_ode_params[1]#9.84477 Particles{Float64, 1}
+# n2_sol2flux33 = solve(prob, alg)
+# n2_sol2flux33.estimated_ode_params[1]#6.87107 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux111 = solve(prob, alg)
+# n2_sol2flux111.estimated_ode_params[1]#6.60739 Particles{Float64, 1}
+# n2_sol2flux222 = solve(prob, alg)
+# n2_sol2flux222.estimated_ode_params[1]#7.05923 Particles{Float64, 1}
+# n2_sol2flux333 = solve(prob, alg)
+# n2_sol2flux333.estimated_ode_params[1]#6.5017 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new all 2*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux1 = solve(prob, alg)
+# n2all5sol2flux1.estimated_ode_params[1]#11.3659 Particles{Float64, 1}
+# n2all5sol2flux2 = solve(prob, alg)
+# n2all5sol2flux2.estimated_ode_params[1]#6.65634 Particles{Float64, 1}
+# n2all5sol2flux3 = solve(prob, alg)
+# n2all5sol2flux3.estimated_ode_params[1]#6.61905 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux11 = solve(prob, alg)
+# n2all5sol2flux11.estimated_ode_params[1]#6.27555 Particles{Float64, 1}
+# n2all5sol2flux22 = solve(prob, alg)
+# n2all5sol2flux22.estimated_ode_params[1]#6.24352 Particles{Float64, 1}
+# n2all5sol2flux33 = solve(prob, alg)
+# n2all5sol2flux33.estimated_ode_params[1]#6.33723 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux111 = solve(prob, alg)
+# n2all5sol2flux111.estimated_ode_params[1] #5.95535 Particles{Float64, 1}
+# n2all5sol2flux222 = solve(prob, alg)
+# n2all5sol2flux222.estimated_ode_params[1] #5.98301 Particles{Float64, 1}
+# n2all5sol2flux333 = solve(prob, alg)
+# n2all5sol2flux333.estimated_ode_params[1] #5.9081 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new all (l2+l22)
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux1 = solve(prob, alg)
+# nall5sol2flux1.estimated_ode_params[1]#6.54705 Particles{Float64, 1}
+# nall5sol2flux2 = solve(prob, alg)
+# nall5sol2flux2.estimated_ode_params[1]#6.6967 Particles{Float64, 1}
+# nall5sol2flux3 = solve(prob, alg)
+# nall5sol2flux3.estimated_ode_params[1]#6.47173 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux11 = solve(prob, alg)
+# nall5sol2flux11.estimated_ode_params[1]#6.2113 Particles{Float64, 1}
+# nall5sol2flux22 = solve(prob, alg)
+# nall5sol2flux22.estimated_ode_params[1]#6.10675 Particles{Float64, 1}
+# nall5sol2flux33 = solve(prob, alg)
+# nall5sol2flux33.estimated_ode_params[1]#6.11541 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux111 = solve(prob, alg)
+# nall5sol2flux111.estimated_ode_params[1]#6.35224 Particles{Float64, 1}
+# nall5sol2flux222 = solve(prob, alg)
+# nall5sol2flux222.estimated_ode_params[1]#6.40542 Particles{Float64, 1}
+# nall5sol2flux333 = solve(prob, alg)
+# nall5sol2flux333.estimated_ode_params[1]#6.44206 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new 5* (new only l22 mod)
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux1 = solve(prob, alg)
+# n5sol2flux1.estimated_ode_params[1]#7.05077 Particles{Float64, 1}
+# n5sol2flux2 = solve(prob, alg)
+# n5sol2flux2.estimated_ode_params[1]#7.07303 Particles{Float64, 1}
+# n5sol2flux3 = solve(prob, alg)
+# n5sol2flux3.estimated_ode_params[1]#5.10622 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux11 = solve(prob, alg)
+# n5sol2flux11.estimated_ode_params[1]#7.39852 Particles{Float64, 1}
+# n5sol2flux22 = solve(prob, alg)
+# n5sol2flux22.estimated_ode_params[1]#7.30319 Particles{Float64, 1}
+# n5sol2flux33 = solve(prob, alg)
+# n5sol2flux33.estimated_ode_params[1]#6.73722 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux111 = solve(prob, alg)
+# n5sol2flux111.estimated_ode_params[1]#7.15996 Particles{Float64, 1}
+# n5sol2flux222 = solve(prob, alg)
+# n5sol2flux222.estimated_ode_params[1]#7.02949 Particles{Float64, 1}
+# n5sol2flux333 = solve(prob, alg)
+# n5sol2flux333.estimated_ode_params[1]#6.9393 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux1 = solve(prob, alg)
+# nsol2flux1.estimated_ode_params[1] #5.82707 Particles{Float64, 1}
+# nsol2flux2 = solve(prob, alg)
+# nsol2flux2.estimated_ode_params[1] #4.81534 Particles{Float64, 1}
+# nsol2flux3 = solve(prob, alg)
+# nsol2flux3.estimated_ode_params[1] #5.52965 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux11 = solve(prob, alg)
+# nsol2flux11.estimated_ode_params[1] #7.04027 Particles{Float64, 1}
+# nsol2flux22 = solve(prob, alg)
+# nsol2flux22.estimated_ode_params[1] #7.17588 Particles{Float64, 1}
+# nsol2flux33 = solve(prob, alg)
+# nsol2flux33.estimated_ode_params[1] #6.94495 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux111 = solve(prob, alg)
+# nsol2flux111.estimated_ode_params[1] #6.06608 Particles{Float64, 1}
+# nsol2flux222 = solve(prob, alg)
+# nsol2flux222.estimated_ode_params[1] #6.84726 Particles{Float64, 1}
+# nsol2flux333 = solve(prob, alg)
+# nsol2flux333.estimated_ode_params[1] #6.83463 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1 = solve(prob, alg)
+# sol2flux1.estimated_ode_params[1] #6.71397 Particles{Float64, 1} 6.37604 Particles{Float64, 1}
+# sol2flux2 = solve(prob, alg)
+# sol2flux2.estimated_ode_params[1] #6.73509 Particles{Float64, 1} 6.21692 Particles{Float64, 1}
+# sol2flux3 = solve(prob, alg)
+# sol2flux3.estimated_ode_params[1] #6.65453 Particles{Float64, 1} 6.23153 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11 = solve(prob, alg)
+# sol2flux11.estimated_ode_params[1] #6.23443 Particles{Float64, 1} 6.30635 Particles{Float64, 1}
+# sol2flux22 = solve(prob, alg)
+# sol2flux22.estimated_ode_params[1] #6.18879 Particles{Float64, 1} 6.30099 Particles{Float64, 1}
+# sol2flux33 = solve(prob, alg)
+# sol2flux33.estimated_ode_params[1] #6.22773 Particles{Float64, 1} 6.30671 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111 = solve(prob, alg)
+# sol2flux111.estimated_ode_params[1] #6.15832 Particles{Float64, 1} 6.35453 Particles{Float64, 1}
+# sol2flux222 = solve(prob, alg)
+# sol2flux222.estimated_ode_params[1] #6.16968 Particles{Float64, 1}6.31125 Particles{Float64, 1}
+# sol2flux333 = solve(prob, alg)
+# sol2flux333.estimated_ode_params[1] #6.12466 Particles{Float64, 1} 6.26514 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1_p = solve(prob, alg)
+# sol2flux1_p.estimated_ode_params[1] #5.74065 Particles{Float64, 1} #6.83683 Particles{Float64, 1}
+# sol2flux2_p = solve(prob, alg)
+# sol2flux2_p.estimated_ode_params[1] #9.82504 Particles{Float64, 1} #6.14568 Particles{Float64, 1}
+# sol2flux3_p = solve(prob, alg)
+# sol2flux3_p.estimated_ode_params[1] #5.75075 Particles{Float64, 1} #6.08579 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11_p = solve(prob, alg)
+# sol2flux11_p.estimated_ode_params[1] #6.19414 Particles{Float64, 1} #6.04621 Particles{Float64, 1}
+# sol2flux22_p = solve(prob, alg)
+# sol2flux22_p.estimated_ode_params[1] #6.15227 Particles{Float64, 1} #6.29086 Particles{Float64, 1}
+# sol2flux33_p = solve(prob, alg)
+# sol2flux33_p.estimated_ode_params[1] #6.19048 Particles{Float64, 1} #6.12516 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111_p = solve(prob, alg)
+# sol2flux111_p.estimated_ode_params[1] #6.51608 Particles{Float64, 1}# 6.42945Particles{Float64, 1}
+# sol2flux222_p = solve(prob, alg)
+# sol2flux222_p.estimated_ode_params[1] #6.4875 Particles{Float64, 1} # 6.44524Particles{Float64, 1}
+# sol2flux333_p = solve(prob, alg)
+# sol2flux333_p.estimated_ode_params[1] #6.51679 Particles{Float64, 1}# 6.43152Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood, dataset(1.0-2.0)
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux1 = solve(prob, alg)
+# sol1flux1.estimated_ode_params[1] #6.35164 Particles{Float64, 1}
+# sol1flux2 = solve(prob, alg)
+# sol1flux2.estimated_ode_params[1] #6.30919 Particles{Float64, 1}
+# sol1flux3 = solve(prob, alg)
+# sol1flux3.estimated_ode_params[1] #6.33554 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux11 = solve(prob, alg)
+# sol1flux11.estimated_ode_params[1] #6.39769 Particles{Float64, 1}
+# sol1flux22 = solve(prob, alg)
+# sol1flux22.estimated_ode_params[1] #6.43924 Particles{Float64, 1}
+# sol1flux33 = solve(prob, alg)
+# sol1flux33.estimated_ode_params[1] #6.4697 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux111 = solve(prob, alg)
+# sol1flux111.estimated_ode_params[1] #6.27812 Particles{Float64, 1}
+# sol1flux222 = solve(prob, alg)
+# sol1flux222.estimated_ode_params[1] #6.19278 Particles{Float64, 1}
+# sol1flux333 = solve(prob, alg)
+# sol1flux333.estimated_ode_params[1] # 9.68244Particles{Float64, 1} (first try) # 6.23969 Particles{Float64, 1}(second try)
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1.0-2.0)
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux1_p = solve(prob, alg)
+# sol1flux1_p.estimated_ode_params[1]#6.36269 Particles{Float64, 1}
+
+# sol1flux2_p = solve(prob, alg)
+# sol1flux2_p.estimated_ode_params[1]#6.34685 Particles{Float64, 1}
+
+# sol1flux3_p = solve(prob, alg)
+# sol1flux3_p.estimated_ode_params[1]#6.31421 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux11_p = solve(prob, alg)
+# sol1flux11_p.estimated_ode_params[1] #6.15725 Particles{Float64, 1}
+
+# sol1flux22_p = solve(prob, alg)
+# sol1flux22_p.estimated_ode_params[1] #6.18145 Particles{Float64, 1}
+
+# sol1flux33_p = solve(prob, alg)
+# sol1flux33_p.estimated_ode_params[1] #6.21905 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux111_p = solve(prob, alg)
+# sol1flux111_p.estimated_ode_params[1]#6.13481 Particles{Float64, 1}
+
+# sol1flux222_p = solve(prob, alg)
+# sol1flux222_p.estimated_ode_params[1]#9.68555 Particles{Float64, 1}
+
+# sol1flux333_p = solve(prob, alg)
+# sol1flux333_p.estimated_ode_params[1]#6.1477 Particles{Float64, 1}
+
+# # -----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1-2), again but different density
+# # 12 points
+# ta = range(1.0, tspan[2], length = 12)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux1_p = solve(prob, alg)
+# sol3flux1_p.estimated_ode_params[1]#6.50048 Particles{Float64, 1}
+# sol3flux2_p = solve(prob, alg)
+# sol3flux2_p.estimated_ode_params[1]#6.57597 Particles{Float64, 1}
+# sol3flux3_p = solve(prob, alg)
+# sol3flux3_p.estimated_ode_params[1]#6.24487 Particles{Float64, 1}
+
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux11_p = solve(prob, alg)
+# sol3flux11_p.estimated_ode_params[1]#6.53093 Particles{Float64, 1}
+
+# sol3flux22_p = solve(prob, alg)
+# sol3flux22_p.estimated_ode_params[1]#6.32744 Particles{Float64, 1}
+
+# sol3flux33_p = solve(prob, alg)
+# sol3flux33_p.estimated_ode_params[1]#6.49175 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux111_p = solve(prob, alg)
+# sol3flux111_p.estimated_ode_params[1]#6.4455 Particles{Float64, 1}
+# sol3flux222_p = solve(prob, alg)
+# sol3flux222_p.estimated_ode_params[1]#6.40736 Particles{Float64, 1}
+# sol3flux333_p = solve(prob, alg)
+# sol3flux333_p.estimated_ode_params[1]#6.46214 Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(0-1)
+# # 25 points
+# ta = range(tspan[1], 1.0, length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux1_p = solve(prob, alg)
+# sol0flux1_p.estimated_ode_params[1]#7.12625 Particles{Float64, 1}
+# sol0flux2_p = solve(prob, alg)
+# sol0flux2_p.estimated_ode_params[1]#8.40948 Particles{Float64, 1}
+# sol0flux3_p = solve(prob, alg)
+# sol0flux3_p.estimated_ode_params[1]#7.18768 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], 1.0, length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux11_p = solve(prob, alg)
+# sol0flux11_p.estimated_ode_params[1]#6.23707 Particles{Float64, 1}
+# sol0flux22_p = solve(prob, alg)
+# sol0flux22_p.estimated_ode_params[1]#6.09728 Particles{Float64, 1}
+# sol0flux33_p = solve(prob, alg)
+# sol0flux33_p.estimated_ode_params[1]#6.12971 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], 1.0, length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux111_p = solve(prob, alg)
+# sol0flux111_p.estimated_ode_params[1]#5.99039 Particles{Float64, 1}
+# sol0flux222_p = solve(prob, alg)
+# sol0flux222_p.estimated_ode_params[1]#5.89609 Particles{Float64, 1}
+# sol0flux333_p = solve(prob, alg)
+# sol0flux333_p.estimated_ode_params[1]#5.91923 Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f1 = solve(prob, alg)
+# sol1f1.estimated_ode_params[1]
+# # 10.9818Particles{Float64, 1}
+# sol1f2 = solve(prob, alg)
+# sol1f2.estimated_ode_params[1]
+# # sol1f3 = solve(prob, alg)
+# # sol1f3.estimated_ode_params[1]
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f11 = solve(prob, alg)
+# sol1f11.estimated_ode_params[1]
+# sol1f22 = solve(prob, alg)
+# sol1f22.estimated_ode_params[1]
+# # sol1f33 = solve(prob, alg)
+# # sol1f33.estimated_ode_params[1]
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f111 = solve(prob, alg)
+# sol1f111.estimated_ode_params[1]
+# sol1f222 = solve(prob, alg)
+# sol1f222.estimated_ode_params[1]
+# # sol1f333 = solve(prob, alg)
+# # sol1f333.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f1_p = solve(prob, alg)
+# sol1f1_p.estimated_ode_params[1]
+# sol1f2_p = solve(prob, alg)
+# sol1f2_p.estimated_ode_params[1]
+# sol1f3_p = solve(prob, alg)
+# sol1f3_p.estimated_ode_params[1]
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f11_p = solve(prob, alg)
+# sol1f11_p.estimated_ode_params[1]
+# sol1f22_p = solve(prob, alg)
+# sol1f22_p.estimated_ode_params[1]
+# sol1f33_p = solve(prob, alg)
+# sol1f33_p.estimated_ode_params[1]
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f111_p = solve(prob, alg)
+# sol1f111_p.estimated_ode_params[1]
+# sol1f222_p = solve(prob, alg)
+# sol1f222_p.estimated_ode_params[1]
+# sol1f333_p = solve(prob, alg)
+# sol1f333_p.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+
+# plot!(title = "9,2.5 50 training 2>full,1>partial")
+
+# p
+# param1
+# # (lux chain)
+# @prob mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 8e-2
+
+# # estimated parameters(lux chain)
+# param1 = sol3lux_pestim.estimated_ode_params[1]
+# @test abs(param1 - p) < abs(0.35 * p)
+
+# p
+# param1
+
+# # # my suggested Loss likelihood part
+# # #  + L2loss2(Tar, θ)
+# # # My suggested extra loss function
+# # function L2loss2(Tar::LogTargetDensity, θ)
+# #     f = Tar.prob.f
+
+# #     # parameter estimation chosen or not
+# #     if Tar.extraparams > 0
+# #         dataset = Tar.dataset
+
+# #         # Timepoints to enforce Physics
+# #         dataset = Array(reduce(hcat, dataset)')
+# #         t = dataset[end, :]
+# #         û = dataset[1:(end - 1), :]
+
+# #         ode_params = Tar.extraparams == 1 ?
+# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+# #         if length(û[:, 1]) == 1
+# #             physsol = [f(û[:, i][1],
+# #                 ode_params,
+# #                 t[i])
+# #                        for i in 1:length(û[1, :])]
+# #         else
+# #             physsol = [f(û[:, i],
+# #                 ode_params,
+# #                 t[i])
+# #                        for i in 1:length(û[1, :])]
+# #         end
+# #         #form of NN output matrix output dim x n
+# #         deri_physsol = reduce(hcat, physsol)
+
+# #         #   OG deriv(basically gradient matching in case of an ODEFunction)
+# #         # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+# #         # if length(û[:, 1]) == 1
+# #         #     deri_sol = [f(û[:, i][1],
+# #         #         Tar.prob.p,
+# #         #         t[i])
+# #         #                 for i in 1:length(û[1, :])]
+# #         # else
+# #         #     deri_sol = [f(û[:, i],
+# #         #         Tar.prob.p,
+# #         #         t[i])
+# #         #                 for i in 1:length(û[1, :])]
+# #         # end
+# #         # deri_sol = reduce(hcat, deri_sol)
+# #         derivatives = calculate_derivatives(Tar.dataset)
+# #         deri_sol = reduce(hcat, derivatives)
+
+# #         physlogprob = 0
+# #         for i in 1:length(Tar.prob.u0)
+# #             # can add phystd[i] for u[i]
+# #             physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+# #                     LinearAlgebra.Diagonal(map(abs2,
+# #                         Tar.l2std[i] .*
+# #                         ones(length(deri_sol[i, :]))))),
+# #                 deri_sol[i, :])
+# #         end
+# #         return physlogprob
+# #     else
+# #         return 0
+# #     end
+# # end
+
+# # function calculate_derivatives(dataset)
+# #     x̂, time = dataset
+# #     num_points = length(x̂)
+
+# #     # Initialize an array to store the derivative values.
+# #     derivatives = similar(x̂)
+
+# #     for i in 2:(num_points - 1)
+# #         # Calculate the first-order derivative using central differences.
+# #         Δt_forward = time[i + 1] - time[i]
+# #         Δt_backward = time[i] - time[i - 1]
+
+# #         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+# #         derivatives[i] = derivative
+# #     end
+
+# #     # Derivatives at the endpoints can be calculated using forward or backward differences.
+# #     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+# #     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+
+# #     return derivatives
+# # end
+
+# size(dataset[1])
+# # Problem 1 with param estimation(flux,lux)
+# # Normal
+# # 6.20311 Particles{Float64, 1},6.21746Particles{Float64, 1}
+# # better
+# # 6.29093Particles{Float64, 1}, 6.27925Particles{Float64, 1}
+# # Non ideal case
+# # 6.14861Particles{Float64, 1}, 
+# sol2flux.estimated_ode_params
+# sol2lux.estimated_ode_params[1]
+# p
+# size(sol3flux_pestim.ensemblesol[2])
+# plott = sol3flux_pestim.ensemblesol[1]
+# using StatsPlots
+# plotly()
+# plot(t, sol3flux_pestim.ensemblesol[1])
+
+# function calculate_derivatives(dataset)
+#     x̂, time = dataset
+#     num_points = length(x̂)
+
+#     # Initialize an array to store the derivative values.
+#     derivatives = similar(x̂)
+
+#     for i in 2:(num_points - 1)
+#         # Calculate the first-order derivative using central differences.
+#         Δt_forward = time[i + 1] - time[i]
+#         Δt_backward = time[i] - time[i - 1]
+
+#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+#         derivatives[i] = derivative
+#     end
+
+#     # Derivatives at the endpoints can be calculated using forward or backward differences.
+#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+
+#     return derivatives
+# end
+
+# # Example usage:
+# # dataset = [x̂, time]
+# derivatives = calculate_derivatives(dataset)
+# dataset[1]
+# # Access derivative values at specific time points as needed.
+
+# # # 9,0.5
+# # 0.09894916260292887
+# # 0.09870335436072103
+# # 0.08398556878067913
+# # 0.10109070099105527
+# # 0.09122683737517055
+# # 0.08614958011892977
+# # mean(abs.(x̂ .- meanscurve1)) #0.017112298305523976
+# # mean(abs.(physsol1 .- meanscurve1)) #0.004038636894341354
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1))#0.01800876370000113
+# # mean(abs.(physsol1 .- meanscurve1))#0.007285681280600875
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.10599926120358046
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10375554193397989
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.10160824458252521
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09999942538357891
+
+# # # ------------------------------------------------normale
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.0333356493928835
+# # mean(abs.(physsol1 .- meanscurve1)) #0.02721733876400459
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.020734206709433347
+# # mean(abs.(physsol1 .- meanscurve1)) #0.012502850740700212
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.10615859683094729
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10508141153722575
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.10833514946031565
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10668470203219232
+
+# # # 9,0.5
+# # 10.158108285475553
+# # 10.207234384538026
+# # 10.215000657664852
+# # 10.213817644016174
+# # 13.380030074088719
+# # 13.348906350967326
+
+# # 6.952731422892041
+
+# # # All losses
+# # 10.161478523326277
+# # # L2 losses 1
+# # 9.33312996960278
+# # # L2 losses 2
+# # 10.217417241370631
+
+# # mean([fhsamples1[i][26] for i in 500:1000]) #6.245045767509431
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.212522300650451
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #35.328636809737695
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #35.232963812125654
+
+# # # ---------------------------------------normale
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.547771572198114
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.158906185002702
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.210400972620185
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.153845019454522
+
+# # # ----------------more dataset normale -----------------------------
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.271141178216537
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.241144692919369
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.124480447973127
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.07838011629903
+
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.016551602015599295
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0021488618484224245
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.017022725082640747
+# # mean(abs.(physsol1 .- meanscurve1)) #0.004339761917100232
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.09668785317864312
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09430712337543362
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.09958118358974392
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09717454226368502
+
+# # # ----------------more dataset special -----------------------------
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.284355334485365
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.259238106698602
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.139808934336987
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.03921327641226
+
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.016627231605546876
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0020311429130039564
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.016650324577507352
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0027537543411154677
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.09713187937270151
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.09550234866855814
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
+
+# # using Plots, StatsPlots
+# # plotly()
+
+# # ---------------------------------------------------------
+# # # # Distribution abstract in wrapper, dataset Float64
+# # # 268.651 s (206393690 allocations: 388.71 GiB)
+# # # 318.170551 seconds (206.29 M allocations: 388.453 GiB, 20.83% gc time)
+
+# # # # Above with dataset Real subtype
+# # # 326.201 s (206327409 allocations: 388.42 GiB)
+# # # 363.189370 seconds (206.25 M allocations: 387.975 GiB, 15.77% gc time)
+# # # 306.171 s (206321277 allocations: 388.55 GiB)
+# # # 356.180699 seconds (206.43 M allocations: 388.361 GiB, 13.77% gc time)
+
+# # # # Above with dataset AbstractFloat subtype
+# # # 290.751187 seconds (205.94 M allocations: 387.955 GiB, 12.92% gc time)
+# # # 296.319815 seconds (206.38 M allocations: 388.730 GiB, 12.69% gc time)
+
+# # # # ODEProblem float64 dtaset and vector distri inside
+# # #   273.169 s (206128318 allocations: 388.40 GiB)
+# # #   274.059531 seconds (205.91 M allocations: 387.953 GiB, 12.77% gc time)
+
+# # # #   Dataset float64 inside and vector distri outsude
+# # #   333.603 s (206251143 allocations: 388.41 GiB)
+# # # 373.377222 seconds (206.11 M allocations: 387.968 GiB, 13.25% gc time)
+# # #   359.745 s (206348301 allocations: 388.41 GiB)
+# # # 357.813114 seconds (206.31 M allocations: 388.354 GiB, 13.54% gc time)
+
+# # # # Dataset float64 inside and vector distri inside
+# # #   326.437 s (206253571 allocations: 388.41 GiB)
+# # #   290.334083 seconds (205.92 M allocations: 387.954 GiB, 13.82% gc time)
+
+# # # # current setting
+# # # 451.304 s (206476927 allocations: 388.43 GiB)
+# # # 384.532732 seconds (206.22 M allocations: 387.976 GiB, 13.17% gc time)
+# # # 310.223 s (206332558 allocations: 388.63 GiB)
+# # # 344.243889 seconds (206.34 M allocations: 388.409 GiB, 13.84% gc time)
+# # # 357.457737 seconds (206.66 M allocations: 389.064 GiB, 18.16% gc time)
+
+# # # # shit setup
+# # #   325.595 s (206283732 allocations: 388.41 GiB)
+# # # 334.248753 seconds (206.06 M allocations: 387.964 GiB, 12.60% gc time)
+# # #   326.011 s (206370857 allocations: 388.56 GiB)
+# # # 327.203339 seconds (206.29 M allocations: 388.405 GiB, 12.92% gc time)
+
+# # # # in wrapper Distribution prior, insiade FLOAT64 DATASET
+# # # 325.158167 seconds (205.97 M allocations: 387.958 GiB, 15.07% gc time) 
+# # #   429.536 s (206476324 allocations: 388.43 GiB)
+# # #   527.364 s (206740343 allocations: 388.58 GiB)
+
+# # # #   wrapper Distribtuion, inside Float64
+# # # 326.017 s (206037971 allocations: 387.96 GiB)
+# # # 347.424730 seconds (206.45 M allocations: 388.532 GiB, 12.92% gc time)
+
+# # # 439.047568 seconds (284.24 M allocations: 392.598 GiB, 15.25% gc time, 14.36% compilation time: 0% of which was recompilation)
+# # # 375.472142 seconds (206.40 M allocations: 388.529 GiB, 14.93% gc time)
+# # # 374.888820 seconds (206.34 M allocations: 388.346 GiB, 14.09% gc time)
+# # # 363.719611 seconds (206.39 M allocations: 388.581 GiB, 15.08% gc time)
+# # # # inside Distribtion, instide Float64
+# # #   310.238 s (206324249 allocations: 388.53 GiB)
+# # #   308.991494 seconds (206.34 M allocations: 388.549 GiB, 14.01% gc time)
+# # #   337.442 s (206280712 allocations: 388.36 GiB)
+# # #   299.983096 seconds (206.29 M allocations: 388.512 GiB, 17.14% gc time)
+
+# # #   394.924357 seconds (206.27 M allocations: 388.337 GiB, 23.68% gc time)
+# # # 438.204179 seconds (206.39 M allocations: 388.470 GiB, 23.84% gc time)
+# # #   376.626914 seconds (206.46 M allocations: 388.693 GiB, 18.72% gc time)
+# # # 286.863795 seconds (206.14 M allocations: 388.370 GiB, 18.80% gc time)
+# # #   285.556929 seconds (206.22 M allocations: 388.371 GiB, 17.04% gc time)
+# # #   291.471662 seconds (205.96 M allocations: 388.068 GiB, 19.85% gc time)
+
+# # # 495.814341 seconds (284.62 M allocations: 392.622 GiB, 12.56% gc time, 10.96% compilation time: 0% of which was recompilation)
+# # # 361.530617 seconds (206.36 M allocations: 388.526 GiB, 14.98% gc time)
+# # # 348.576065 seconds (206.22 M allocations: 388.337 GiB, 15.01% gc time)
+# # # 374.575609 seconds (206.45 M allocations: 388.586 GiB, 14.65% gc time)
+# # # 314.223008 seconds (206.23 M allocations: 388.411 GiB, 14.63% gc time)
+
+# # PROBLEM-3 LOTKA VOLTERRA EXAMPLE [WIP] (WITH PARAMETER ESTIMATION)(will be put in tutorial page)
+# function lotka_volterra(u, p, t)
+#     # Model parameters.
+#     α, β, γ, δ = p
+#     # Current state.
+#     x, y = u
+
+#     # Evaluate differential equations.
+#     dx = (α - β * y) * x # prey
+#     dy = (δ * x - γ) * y # predator
+
+#     return [dx, dy]
+# end
+
+# u0 = [1.0, 1.0]
+# p = [1.5, 1.0, 3.0, 1.0]
+# tspan = (0.0, 6.0)
+# prob = ODEProblem(lotka_volterra, u0, tspan, p)
+# solution = solve(prob, Tsit5(); saveat = 0.05)
+
+# as = reduce(hcat, solution.u)
+# as[1, :]
+# # Plot simulation.
+# time = solution.t
+# u = hcat(solution.u...)
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x = u[1, :] + 0.5 * randn(length(u[1, :]))
+# y = u[2, :] + 0.5 * randn(length(u[1, :]))
+# dataset = [x[1:50], y[1:50], time[1:50]]
+# # scatter!(time, [x, y])
+# # scatter!(dataset[3], [dataset[2], dataset[1]])
+
+# # NN has 2 outputs as u -> [dx,dy]
+# chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
+#     Lux.Dense(6, 2))
+# chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
+
+# # fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+# #                                                                           dataset = dataset,
+# #                                                                           draw_samples = 1000,
+# #                                                                           l2std = [
+# #                                                                               0.05,
+# #                                                                               0.05,
+# #                                                                           ],
+# #                                                                           phystd = [
+# #                                                                               0.05,
+# #                                                                               0.05,
+# #                                                                           ],
+# #                                                                           priorsNNw = (0.0,
+# #          
+
+# #   3.0))
+
+# # check if NN output is more than 1
+# # numoutput = size(luxar[1])[1]
+# # if numoutput > 1
+# #     # Initialize a vector to store the separated outputs for each output dimension
+# #     output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput]
+
+# #     # Loop through each element in the `as` vector
+# #     for element in as
+# #         for i in 1:numoutput
+# #             push!(output_matrices[i], element[i, :])  # Append the i-th output (i-th row) to the i-th output_matrices
+# #         end
+# #     end
+
+# #     ensemblecurves = Vector{}[]
+# #     for r in 1:numoutput
+# #         br = hcat(output_matrices[r]...)'
+# #         ensemblecurve = prob.u0[r] .+
+# #                         [Particles(br[:, i]) for i in 1:length(t)] .*
+# #                         (t .- prob.tspan[1])
+# #         push!(ensemblecurves, ensemblecurve)
+# #     end
+
+# # else
+# #     # ensemblecurve = prob.u0 .+
+# #     #                 [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .*
+# #     #                 (t .- prob.tspan[1])
+# #     print("yuh")
+# # end
+
+# # fhsamplesflux2
+# # nnparams = length(init1)
+# # estimnnparams = [Particles(reduce(hcat, fhsamplesflux2)[i, :]) for i in 1:nnparams]
+# # ninv=4
+# # estimated_params = [Particles(reduce(hcat, fhsamplesflux2[(end - ninv + 1):end])[i, :])
+# #                     for i in (nnparams + 1):(nnparams + ninv)]
+# # output_matrices[r]
+# # br = hcat(output_matrices[r]...)'
+
+# # br[:, 1]
+
+# # [Particles(br[:, i]) for i in 1:length(t)]
+# # prob.u0
+# # [Particles(br[:, i]) for i in 1:length(t)] .*
+# # (t .- prob.tspan[1])
+
+# # ensemblecurve = prob.u0[r] .+
+# #                 [Particles(br[:, i]) for i in 1:length(t)] .*
+# #                 (t .- prob.tspan[1])
+# # push!(ensemblecurves, ensemblecurve)
+
+# using StatsPlots
+# plotly()
+# plot(t, ensemblecurve)
+# plot(t, ensemblecurves[1])
+# plot!(t, ensemblecurves[2])
+# ensemblecurve
+# ensemblecurves[1]
+# fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(1.5,
+#             0.5),
+#         Normal(1.2,
+#             0.5),
+#         Normal(3.3,
+#             0.5),
+#         Normal(1.4,
+#             0.5),
+#     ], progress = true)
+
+# alg = NeuralPDE.BNNODE(chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(4.5,
+#             5),
+#         Normal(7,
+#             2),
+#         Normal(5,
+#             2),
+#         Normal(-4,
+#             6),
+#     ],
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux_pestim = solve(prob, alg)
+
+# # OG PARAM VALUES
+# [1.5, 1.0, 3.0, 1.0]
+# # less
+# # [1.34, 7.51, 2.54, -2.55]
+# # better
+# # [1.48, 0.993, 2.77, 0.954]
+
+# sol3flux_pestim.es
+# sol3flux_pestim.estimated_ode_params
+# # fh_mcmc_chainlux1, fhsampleslux1, fhstatslux1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                                        dataset = dataset,
+# #                                                                        draw_samples = 1000,
+# #                                                                        l2std = [0.05, 0.05],
+# #                                                                        phystd = [
+# #                                                                            0.05,
+# #                                                                            0.05,
+# #                                                                        ],
+# #                                                                        priorsNNw = (0.0,
+# #                                                                                     3.0))
+
+# # fh_mcmc_chainlux2, fhsampleslux2, fhstatslux2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                                        dataset = dataset,
+# #                                                                        draw_samples = 1000,
+# #                                                                        l2std = [0.05, 0.05],
+# #                                                                        phystd = [
+# #                                                                            0.05,
+# #                                                                            0.05,
+# #                                                                        ],
+# #                                                                        priorsNNw = (0.0,
+# #                                                                                     3.0),
+# #                                                                        param = [
+# #                                                                            Normal(1.5, 0.5),
+# #                                                                            Normal(1.2, 0.5),
+# #                                                                            Normal(3.3, 0.5),
+# #                                                                            Normal(1.4, 0.5),
+# #                                                                        ])
+
+# init1, re1 = destructure(chainflux1)
+# θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+# #   PLOT testing points
+# t = time
+# p = prob.p
+# collect(Float64, vcat(ComponentArrays.ComponentArray(θinit)))
+# collect(Float64, ComponentArrays.ComponentArray(θinit))
+# # Mean of last 1000 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+# out = re1.([fhsamplesflux1[i][1:68] for i in 500:1000])
+# yu = [out[i](t') for i in eachindex(out)]
+
+# function getensemble(yu, num_models)
+#     num_rows, num_cols = size(yu[1])
+#     row_means = zeros(Float32, num_rows, num_cols)
+#     for i in 1:num_models
+#         row_means .+= yu[i]
+#     end
+#     row_means ./ num_models
+# end
+# fluxmean = getensemble(yu, length(out))
+# meanscurve1_1 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
+# mean(abs.(u .- meanscurve1_1))
+
+# plot!(t, physsol1)
+# @test mean(abs2.(x̂ .- meanscurve1_1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# out = re1.([fhsamplesflux2[i][1:68] for i in 500:1000])
+# yu = collect(out[i](t') for i in eachindex(out))
+# fluxmean = getensemble(yu, length(out))
+# meanscurve1_2 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
+# mean(abs.(u .- meanscurve1_2))
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# θ = [vector_to_parameters(fhsampleslux1[i][1:(end - 4)], θinit) for i in 500:1000]
+# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+# meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# θ = [vector_to_parameters(fhsampleslux2[i][1:(end - 4)], θinit) for i in 500:1000]
+# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+# meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# # # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+# @test abs(p - mean([fhsamplesflux2[i][69] for i in 500:1000])) < 0.1 * p[1]
+# @test abs(p - mean([fhsampleslux2[i][69] for i in 500:1000])) < 0.2 * p[1]
+
+# # @test abs(p - mean([fhsamplesflux2[i][70] for i in 500:1000])) < 0.1 * p[2]
+# # @test abs(p - mean([fhsampleslux2[i][70] for i in 500:1000])) < 0.2 * p[2]
+
+# # @test abs(p - mean([fhsamplesflux2[i][71] for i in 500:1000])) < 0.1 * p[3]
+# # @test abs(p - mean([fhsampleslux2[i][71] for i in 500:1000])) < 0.2 * p[3]
+
+# # @test abs(p - mean([fhsamplesflux2[i][72] for i in 500:1000])) < 0.1 * p[4]
+# # @test abs(p - mean([fhsampleslux2[i][72] for i in 500:1000])) < 0.2 * p[4]
+
+# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                               dataset = dataset,
+# #                                                               draw_samples = 1000,
+# #                                                               l2std = [0.05, 0.05],
+# #                                                               phystd = [0.05, 0.05],
+# #                                                               priorsNNw = (0.0, 3.0),
+# #                                                               param = [
+# #                                                                   Normal(1.5, 0.5),
+# #                                                                   Normal(1.2, 0.5),
+# #                                                                   Normal(3.3, 0.5),
+# #                                                                   Normal(1.4, 0.5),
+# #                                                               ], autodiff = true)
+
+# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                               dataset = dataset,
+# #                                                               draw_samples = 1000,
+# #                                                               l2std = [0.05, 0.05],
+# #                                                               phystd = [0.05, 0.05],
+# #                                                               priorsNNw = (0.0, 3.0),
+# #                                                               param = [
+# #                                                                   Normal(1.5, 0.5),
+# #                                                                   Normal(1.2, 0.5),
+# #                                                                   Normal(3.3, 0.5),
+# #                                                                   Normal(1.4, 0.5),
+# #                                                               ], nchains = 2)
+
+# # NOTES (WILL CLEAR LATER)
+# # --------------------------------------------------------------------------------------------
+# # Hamiltonian energy must be lowest(more paramters the better is it to map onto them)
+# # full better than L2 and phy individual(test)
+# # in mergephys more points after training points is better from 20->40
+# # does consecutive runs bceome better? why?(plot 172)(same chain maybe)
+# # does density of points in timespan matter dataset vs internal timespan?(plot 172)(100+0.01)
+# # when training from 0-1 and phys from 1-5 with 1/150 simple nn slow,but bigger nn faster decrease in Hmailtonian
+# # bigger time interval more curves to adapt to only more parameters adapt to that, better NN architecture
+# # higher order logproblems solve better
+# # repl up up are same instances? but reexecute calls are new?
+
+# #Compare results against paper example
+# # Lux chains support (DONE)
+# # fix predictions for odes depending upon 1,p in f(u,p,t)(DONE)
+# # lotka volterra learn curve beyond l2 losses(L2 losses determine accuracy of parameters)(parameters cant run free ∴ L2 interval only)
+# # check if prameters estimation works(YES)
+# # lotka volterra parameters estimate (DONE)
+
+# using NeuralPDE, Lux, Flux, Optimization, OptimizationOptimJL
+# import ModelingToolkit: Interval
+# using Plots, StatsPlots
+# plotly()
+# # Profile.init()
+
+# @parameters x y
+# @variables u(..)
+# Dxx = Differential(x)^2
+# Dyy = Differential(y)^2
+
+# # 2D PDE
+# eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# # Boundary conditions
+# bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+#     u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+# # Space and time domains
+# domains = [x ∈ Interval(0.0, 1.0),
+#     y ∈ Interval(0.0, 1.0)]
+
+# # Neural network
+# dim = 2 # number of dimensions
+# chain = Flux.Chain(Flux.Dense(dim, 16, Lux.σ), Flux.Dense(16, 16, Lux.σ), Flux.Dense(16, 1))
+# θ, re = destructure(chain)
+# # Discretization
+# dx = 0.05
+# discretization = PhysicsInformedNN(chain, GridTraining(dx))
+
+# @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+# pinnrep = symbolic_discretize(pde_system, discretization)
+# typeof(pinnrep.phi)
+# typeof(pinnrep.phi)
+# typeof(re)
+# pinnrep.phi([1, 2], θ)
+
+# typeof(θ)
+
+# print(pinnrep)
+# pinnrep.eqs
+# pinnrep.bcs
+# pinnrep.domains
+# pinnrep.eq_params
+# pinnrep.defaults
+# print(pinnrep.default_p)
+# pinnrep.param_estim
+# print(pinnrep.additional_loss)
+# pinnrep.adaloss
+# pinnrep.depvars
+# pinnrep.indvars
+# pinnrep.dict_depvar_input
+# pinnrep.dict_depvars
+# pinnrep.dict_indvars
+# print(pinnrep.logger)
+# pinnrep.multioutput
+# pinnrep.iteration
+# pinnrep.init_params
+# pinnrep.flat_init_params
+# pinnrep.phi
+# pinnrep.derivative
+# pinnrep.strategy
+# pinnrep.pde_indvars
+# pinnrep.bc_indvars
+# pinnrep.pde_integration_vars
+# pinnrep.bc_integration_vars
+# pinnrep.integral
+# pinnrep.symbolic_pde_loss_functions
+# pinnrep.symbolic_bc_loss_functions
+# pinnrep.loss_functions
+
+# #  = discretize(pde_system, discretization)
+# prob = symbolic_discretize(pde_system, discretization)
+# # "The boundary condition loss functions"
+# sum([prob.loss_functions.bc_loss_functions[i](θ) for i in eachindex(1:4)])
+# sum([prob.loss_functions.pde_loss_functions[i](θ) for i in eachindex(1)])
+
+# prob.loss_functions.full_loss_function(θ, 32)
+
+# prob.loss_functions.bc_loss_functions[1](θ)
+
+# prob.loss_functions.bc_loss_functions
+# prob.loss_functions.full_loss_function
+# prob.loss_functions.additional_loss_function
+# prob.loss_functions.pde_loss_functions
+
+# 1.3953060473003345 + 1.378102161087438 + 1.395376727128639 + 1.3783868705075002 +
+# 0.22674532775196876
+# # "The PDE loss functions"
+# prob.loss_functions.pde_loss_functions
+# prob.loss_functions.pde_loss_functions[1](θ)
+# # "The full loss function, combining the PDE and boundary condition loss functions.This is the loss function that is used by the optimizer."
+# prob.loss_functions.full_loss_function(θ, nothing)
+# prob.loss_functions.full_loss_function(θ, 423423)
+
+# # "The wrapped `additional_loss`, as pieced together for the optimizer."
+# prob.loss_functions.additional_loss_function
+# # "The pre-data version of the PDE loss function"
+# prob.loss_functions.datafree_pde_loss_functions
+# # "The pre-data version of the BC loss function"
+# prob.loss_functions.datafree_bc_loss_functions
+
+# using Random
+# θ, st = Lux.setup(Random.default_rng(), chain)
+# #Optimizer
+# opt = OptimizationOptimJL.BFGS()
+
+# #Callback function
+# callback = function (p, l)
+#     println("Current loss is: $l")
+#     return false
+# end
+
+# res = Optimization.solve(prob, opt, callback = callback, maxiters = 1000)
+# phi = discretization.phi
+
+# # ------------------------------------------------
+# using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, OrdinaryDiffEq,
+#       Plots
+# import ModelingToolkit: Interval, infimum, supremum
+# @parameters t, σ_, β, ρ
+# @variables x(..), y(..), z(..)
+# Dt = Differential(t)
+# eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+#     Dt(y(t)) ~ x(t) * (ρ - z(t)) - y(t),
+#     Dt(z(t)) ~ x(t) * y(t) - β * z(t)]
+
+# bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+# domains = [t ∈ Interval(0.0, 1.0)]
+# dt = 0.01
+
+# input_ = length(domains)
+# n = 8
+# chain1 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+# chain2 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+# chain3 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+
+# function lorenz!(du, u, p, t)
+#     du[1] = 10.0 * (u[2] - u[1])
+#     du[2] = u[1] * (28.0 - u[3]) - u[2]
+#     du[3] = u[1] * u[2] - (8 / 3) * u[3]
+# end
+
+# u0 = [1.0; 0.0; 0.0]
+# tspan = (0.0, 1.0)
+# prob = ODEProblem(lorenz!, u0, tspan)
+# sol = solve(prob, Tsit5(), dt = 0.1)
+# ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
+# function getData(sol)
+#     data = []
+#     us = hcat(sol(ts).u...)
+#     ts_ = hcat(sol(ts).t...)
+#     return [us, ts_]
+# end
+# data = getData(sol)
+
+# (u_, t_) = data
+# len = length(data[2])
+
+# depvars = [:x, :y, :z]
+# function additional_loss(phi, θ, p)
+#     return sum(sum(abs2, phi[i](t_, θ[depvars[i]]) .- u_[[i], :]) / len for i in 1:1:3)
+# end
+
+# discretization = NeuralPDE.PhysicsInformedNN([chain1, chain2, chain3],
+#                                              NeuralPDE.GridTraining(dt),
+#                                              param_estim = false,
+#                                              additional_loss = additional_loss)
+# @named pde_system = PDESystem(eqs, bcs, domains, [t], [x(t), y(t), z(t)], [σ_, ρ, β],
+#                               defaults = Dict([p .=> 1.0 for p in [σ_, ρ, β]]))
+# prob = NeuralPDE.discretize(pde_system, discretization)
+# callback = function (p, l)
+#     println("Current loss is: $l")
+#     return false
+# end
+# res = Optimization.solve(prob, BFGS(); callback = callback, maxiters = 5000)
+# p_ = res.u[(end - 2):end] # p_ = [9.93, 28.002, 2.667]
+
+# minimizers = [res.u.depvar[depvars[i]] for i in 1:3]
+# ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
+# u_predict = [[discretization.phi[i]([t], minimizers[i])[1] for t in ts] for i in 1:3]
+# plot(sol)
+# plot!(ts, u_predict, label = ["x(t)" "y(t)" "z(t)"])
+
+# discretization.multioutput
+# discretization.chain
+# discretization.strategy
+# discretization.init_params
+# discretization.phi
+# discretization.derivative
+# discretization.param_estim
+# discretization.additional_loss
+# discretization.adaptive_loss
+# discretization.logger
+# discretization.log_options
+# discretization.iteration
+# discretization.self_increment
+# discretization.multioutput
+# discretization.kwargs
+
+# struct BNNODE1{P <: Vector{<:Distribution}}
+#     chain::Any
+#     Kernel::Any
+#     draw_samples::UInt32
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE1(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
+#                      l2std = [0.05], phystd = [0.05])
+#         BNNODE1(chain, Kernel, draw_samples, priorsNNw, param, l2std, phystd)
+#     end
+# end
+
+# struct BNNODE3{C, K, P <: Union{Any, Vector{<:Distribution}}}
+#     chain::C
+#     Kernel::K
+#     draw_samples::UInt32
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE3(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
+#                      l2std = [0.05], phystd = [0.05])
+#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain, Kernel, draw_samples,
+#                                                           priorsNNw, param, l2std, phystd)
+#     end
+# end
+# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+# linear = (u, p, t) -> cos(2 * π * t)
+# tspan = (0.0, 2.0)
+# u0 = 0.0
+# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+
+# ta = range(tspan[1], tspan[2], length = 300)
+# u = [linear_analytic(u0, nothing, ti) for ti in ta]
+# sol1 = solve(prob, Tsit5())
+
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂[1:100], time[1:100]]
+
+# # Call BPINN, create chain
+# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
+# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+# HMC
+# solve(prob, BNNODE(chainflux, HMC))
+# BNNODE1(chainflux, HMC, 2000)
+
+# draw_samples = 2000
+# priorsNNw = (0.0, 3.0)
+# param = []
+# l2std = [0.05]
+# phystd = [0.05]
+# @time BNNODE3(chainflux, HMC, draw_samples = 2000, priorsNNw = (0.0, 3.0),
+#               param = [nothing],
+#               l2std = [0.05], phystd = [0.05])
+# typeof(Nothing) <: Vector{<:Distribution}
+# Nothing <: Distribution
+# {UnionAll} <: Distribution
+# @time [Nothing]
+# typeof([Nothing])
+# @time [1]
+
+# function test1(sum; c = 23, d = 32)
+#     return sum + c + d
+# end
+# function test(a, b; c, d)
+#     return test1(a + b, c, d)
+# end
+
+# test(2, 2)
+
+# struct BNNODE3{C, K, P <: Union{Vector{Nothing}, Vector{<:Distribution}}}
+#     chain::C
+#     Kernel::K
+#     draw_samples::Int64
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE3(chain, Kernel; draw_samples,
+#                      priorsNNw, param = [nothing], l2std, phystd)
+#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain,
+#                                                           Kernel,
+#                                                           draw_samples,
+#                                                           priorsNNw,
+#                                                           param, l2std,
+#                                                           phystd)
+#     end
+# end
+
+# function solve1(prob::DiffEqBase.AbstractODEProblem, alg::BNNODE3;
+#                 dataset = [nothing], dt = 1 / 20.0,
+#                 init_params = nothing, nchains = 1,
+#                 autodiff = false, Integrator = Leapfrog,
+#                 Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
+#                 Metric = DiagEuclideanMetric, jitter_rate = 3.0,
+#                 tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
+#                 n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = true,
+#                 verbose = false)
+#     chain = alg.chain
+#     l2std = alg.l2std
+#     phystd = alg.phystd
+#     priorsNNw = alg.priorsNNw
+#     Kernel = alg.Kernel
+#     draw_samples = alg.draw_samples
+
+#     param = alg.param == [nothing] ? [] : alg.param
+#     mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain, dataset = dataset,
+#                                                             draw_samples = draw_samples,
+#                                                             init_params = init_params,
+#                                                             physdt = dt, l2std = l2std,
+#                                                             phystd = phystd,
+#                                                             priorsNNw = priorsNNw,
+#                                                             param = param,
+#                                                             nchains = nchains,
+#                                                             autodiff = autodiff,
+#                                                             Kernel = Kernel,
+#                                                             Integrator = Integrator,
+#                                                             Adaptor = Adaptor,
+#                                                             targetacceptancerate = targetacceptancerate,
+#                                                             Metric = Metric,
+#                                                             jitter_rate = jitter_rate,
+#                                                             tempering_rate = tempering_rate,
+#                                                             max_depth = max_depth,
+#                                                             Δ_max = Δ_max,
+#                                                             n_leapfrog = n_leapfrog, δ = δ,
+#                                                             λ = λ, progress = progress,
+#                                                             verbose = verbose)
+# end
+
+# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+# linear = (u, p, t) -> cos(2 * π * t)
+# tspan = (0.0, 2.0)
+# u0 = 0.0
+# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+
+# ta = range(tspan[1], tspan[2], length = 300)
+# u = [linear_analytic(u0, nothing, ti) for ti in ta]
+# # sol1 = solve(prob, Tsit5())
+
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂[1:100], time[1:100]]
+
+# # Call BPINN, create chain
+# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
+# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+# HMC
+
+# solve1(prob, a)
+# a = BNNODE3(chainflux, HMC, draw_samples = 2000,
+#             priorsNNw = (0.0, 3.0),
+#             l2std = [0.05], phystd = [0.05])
+
+# Define Lotka-Volterra model.
+function lotka_volterra1(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 6.0)
+prob = ODEProblem(lotka_volterra1, u0, tspan, p)
+solution = solve(prob, Tsit5(); saveat = 0.05)
+
+as = reduce(hcat, solution.u)
+as[1, :]
+# Plot simulation.
+time = solution.t
+u = hcat(solution.u...)
+# BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+x = u[1, :] + 0.5 * randn(length(u[1, :]))
+y = u[2, :] + 0.5 * randn(length(u[1, :]))
+dataset = [x[1:50], y[1:50], time[1:50]]
+# scatter!(time, [x, y])
+# scatter!(dataset[3], [dataset[2], dataset[1]])
+
+# NN has 2 outputs as u -> [dx,dy]
+chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
+    Lux.Dense(6, 2))
+chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
+
+fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0, 3.0), progress = true)
+ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0, 3.0), progress = true)
+
+#     2×171 Matrix{Float64}:
+#  -0.5  -0.518956  -0.529639  …  -1.00266  -1.01049
+#   2.0   1.97109    1.92747       0.42619   0.396335
+
+#     2-element Vector{Float64}:
+#  -119451.94949911036
+#  -128543.23714618056
+
+# alg = NeuralPDE.BNNODE(chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(4.5,
+#             5),
+#         Normal(7,
+#             2),
+#         Normal(5,
+#             2),
+#         Normal(-4,
+#             6),
+#     ],
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux_pestim = solve(prob, alg)
+
+#  ----------------------------------------------
+# original paper implementation
+# 25 points 
+run1  #7.70593 Particles{Float64, 1}
+run2 #6.66347 Particles{Float64, 1} 
+run3 #6.84827 Particles{Float64, 1} 
+
+# 50 points 
+run1 #7.83577 Particles{Float64, 1}
+run2 #6.49477 Particles{Float64, 1}
+run3 #6.47421 Particles{Float64, 1}
+
+# 100 points 
+run1 #5.96604 Particles{Float64, 1}
+run2 #6.05432 Particles{Float64, 1}
+run3 #6.08856 Particles{Float64, 1}
+
+# Full likelihood(uses total variation regularized differentiation) 
+# 25 points 
+run1 #6.41722 Particles{Float64, 1}
+run2 #6.42782 Particles{Float64, 1}
+run3 #6.42782 Particles{Float64, 1}
+
+# 50 points
+run1 #5.71268 Particles{Float64, 1}
+run2 #5.74599 Particles{Float64, 1}
+run3 #5.74599 Particles{Float64, 1}
+
+# 100 points  
+run1 #6.59097 Particles{Float64, 1}
+run2 #6.62813 Particles{Float64, 1}
+run3 #6.62813 Particles{Float64, 1}
+
+using Plots, StatsPlots
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 6.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+# Plot simulation.
+
+solution = solve(prob, Tsit5(); saveat = 0.05)
+plot(solve(prob, Tsit5()))
+
+# Dataset creation for parameter estimation
+time = solution.t
+u = hcat(solution.u...)
+x = u[1, :] + 0.5 * randn(length(u[1, :]))
+y = u[2, :] + 0.5 * randn(length(u[1, :]))
+dataset = [x, y, time]
+
+# Neural Networks must have 2 outputs as u -> [dx,dy] in function lotka_volterra()
+chainflux = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2)) |>
+            Flux.f64
+
+chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
+
+alg1 = NeuralPDE.BNNODE(chainflux,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol_flux_pestim = solve(prob, alg1)
+
+# Dataset not needed as we are solving the equation with ideal parameters
+alg2 = NeuralPDE.BNNODE(chainlux,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    n_leapfrog = 30, progress = true)
+
+sol_lux = solve(prob, alg2)
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+# plotting solution for x,y for chain_flux
+plot(t, sol_flux_pestim.ensemblesol[1])
+plot!(t, sol_flux_pestim.ensemblesol[2])
+
+plot(sol_flux_pestim.ens1mblesol[1])
+plot!(sol_flux_pestim.ensemblesol[2])
+
+# estimated ODE parameters by .estimated_ode_params, weights and biases by .estimated_nn_params
+sol_flux_pestim.estimated_nn_params
+sol_flux_pestim.estimated_ode_params
+
+# plotting solution for x,y for chain_lux
+plot(t, sol_lux.ensemblesol[1])
+plot!(t, sol_lux.ensemblesol[2])
+
+# estimated weights and biases by .estimated_nn_params for chain_lux
+sol_lux.estimated_nn_params
+
+# # ----------------------------------stats-----------------------------
+# #   ----------------------------
+# # -----------------------------
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:47 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:38 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:12 
+# #   --------------------------
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:05:09 
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:47 
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:25 
+# #   --------------
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:47 
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:54
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:46
+# # ------------------------
+# # -----------------------
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:06
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:32
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:01 
+# # --------------------------
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:02
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:08
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:15
+# # ----------------------------
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:37
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:02
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:13
+
+using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL
+import ModelingToolkit: Interval, infimum, supremum
+
+using NeuralPDE, Flux, OptimizationOptimisers
+
+function diffeq(u, p, t)
+    u1, u2 = u
+    return [u2, p[1] + p[2] * sin(u1) + p[3] * u2]
+end
+p = [5, -10, -1.7]
+u0 = [-1.0, 7.0]
+tspan = (0.0, 10.0)
+prob = ODEProblem(ODEFunction(diffeq), u0, tspan, p)
+
+chainnew = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2)) |>
+           Flux.f64
+
+opt = OptimizationOptimisers.Adam(0.1)
+opt = Optimisers.ADAGrad(0.1)
+opt = Optimisers.AdaMax(0.01)
+algnew = NeuralPDE.NNODE(chainnew, opt)
+solution_new = solve(prob, algnew, verbose = true,
+    abstol = 1e-10, maxiters = 7000)
+u = reduce(hcat, solution_new.u)
+plot(solution_new.t, u[1, :])
+plot!(solution_new.t, u[2, :])
+
+algnew = NeuralPDE.BNNODE(chainnew, draw_samples = 200,
+    n_leapfrog = 30, progress = true)
+solution_new = solve(prob, algnew)
+
+@parameters t
+@variables u1(..), u2(..)
+D = Differential(t)
+eq = [D(u1(t)) ~ u2(t),
+    D(u2(t)) ~ 5 - 10 * sin(u1(t)) - 1.7 * u2(t)];
+
+import ModelingToolkit: Interval
+bcs = [u1(0) ~ -1, u2(0) ~ 7]
+domains = [t ∈ Interval(0.0, 10.0)]
+dt = 0.01
+
+input_ = length(domains) # number of dimensions
+n = 16
+chain = [Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ), Lux.Dense(n, 1))
+         for _ in 1:2]
+
+@named pde_system = PDESystem(eq, bcs, domains, [t], [u1(t), u2(t)])
+
+strategy = NeuralPDE.GridTraining(dt)
+discretization = PhysicsInformedNN(chain, strategy)
+sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+
+pde_loss_functions = sym_prob.loss_functions.pde_loss_functions
+bc_loss_functions = sym_prob.loss_functions.bc_loss_functions
+
+callback = function (p, l)
+    println("loss: ", l)
+    # println("pde_losses: ", map(l_ -> l_(p), pde_loss_functions))
+    # println("bcs_losses: ", map(l_ -> l_(p), bc_loss_functions))
+    return false
+end
+
+loss_functions = [pde_loss_functions; bc_loss_functions]
+
+function loss_function(θ, p)
+    sum(map(l -> l(θ), loss_functions))
+end
+
+f_ = OptimizationFunction(loss_function, Optimization.AutoZygote())
+prob = Optimization.OptimizationProblem(f_, sym_prob.flat_init_params)
+
+res = Optimization.solve(prob,
+    OptimizationOptimJL.BFGS();
+    callback = callback,
+    maxiters = 1000)
+phi = discretization.phi
\ No newline at end of file
diff --git a/test/BPINN_tests.jl b/test/BPINN_tests.jl
deleted file mode 100644
index e69de29bb2..0000000000

From e71f4cceb61e856f40308dffd4ae320575faa447 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 20 Jan 2024 23:53:20 +0530
Subject: [PATCH 061/107] removed new files

---
 src/BNNODE_new.jl     |  794 --------
 test/BPINN_newform.jl | 4354 -----------------------------------------
 2 files changed, 5148 deletions(-)
 delete mode 100644 src/BNNODE_new.jl
 delete mode 100644 test/BPINN_newform.jl

diff --git a/src/BNNODE_new.jl b/src/BNNODE_new.jl
deleted file mode 100644
index e6b1f24faa..0000000000
--- a/src/BNNODE_new.jl
+++ /dev/null
@@ -1,794 +0,0 @@
-mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
-    P <: Vector{<:Distribution},
-    D <:
-    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}},
-}
-    dim::Int
-    prob::DiffEqBase.ODEProblem
-    chain::C
-    st::S
-    strategy::ST
-    dataset::D
-    priors::P
-    phystd::Vector{Float64}
-    l2std::Vector{Float64}
-    autodiff::Bool
-    physdt::Float64
-    extraparams::Int
-    init_params::I
-
-    function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
-        dataset,
-        priors, phystd, l2std, autodiff, physdt, extraparams,
-        init_params::AbstractVector)
-        new{
-            typeof(chain),
-            Nothing,
-            typeof(strategy),
-            typeof(init_params),
-            typeof(priors),
-            typeof(dataset),
-        }(dim,
-            prob,
-            chain,
-            nothing, strategy,
-            dataset,
-            priors,
-            phystd,
-            l2std,
-            autodiff,
-            physdt,
-            extraparams,
-            init_params)
-    end
-    function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
-        dataset,
-        priors, phystd, l2std, autodiff, physdt, extraparams,
-        init_params::NamedTuple)
-        new{
-            typeof(chain),
-            typeof(st),
-            typeof(strategy),
-            typeof(init_params),
-            typeof(priors),
-            typeof(dataset),
-        }(dim,
-            prob,
-            chain, st, strategy,
-            dataset, priors,
-            phystd, l2std,
-            autodiff,
-            physdt,
-            extraparams,
-            init_params)
-    end
-end
-
-"""
-cool function to convert parameter's vector to ComponentArray of parameters (for Lux Chain: vector of samples -> Lux ComponentArrays)
-"""
-function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
-    @assert length(ps_new) == Lux.parameterlength(ps)
-    i = 1
-    function get_ps(x)
-        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
-        i += length(x)
-        return z
-    end
-    return Functors.fmap(get_ps, ps)
-end
-
-function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
-    return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
-    #  +  L2loss2(Tar, θ)
-end
-
-LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
-
-function LogDensityProblems.capabilities(::LogTargetDensity)
-    LogDensityProblems.LogDensityOrder{1}()
-end
-
-# suggested extra loss function
-function L2loss2(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        dataset, deri_sol = Tar.dataset
-        # deri_sol = deri_sol'
-        autodiff = Tar.autodiff
-
-        # # Timepoints to enforce Physics
-        # dataset = Array(reduce(hcat, dataset)')
-        # t = dataset[end, :]
-        # û = dataset[1:(end - 1), :]
-
-        # ode_params = Tar.extraparams == 1 ?
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        # if length(û[:, 1]) == 1
-        #     physsol = [f(û[:, i][1],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # else
-        #     physsol = [f(û[:, i],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # end
-        # #form of NN output matrix output dim x n
-        # deri_physsol = reduce(hcat, physsol)
-
-        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
-        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
-        # if length(û[:, 1]) == 1
-        #     deri_sol = [f(û[:, i][1],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # else
-        #     deri_sol = [f(û[:, i],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # end
-        # deri_sol = reduce(hcat, deri_sol) 
-        # deri_sol = reduce(hcat, derivatives)
-
-        # Timepoints to enforce Physics 
-        t = dataset[end]
-        u1 = dataset[2]
-        û = dataset[1]
-        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
-        #  
-
-        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        if length(Tar.prob.u0) == 1
-            physsol = [f(û[i],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        else
-            physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        end
-        #form of NN output matrix output dim x n 
-        deri_physsol = reduce(hcat, physsol)
-
-        # if length(Tar.prob.u0) == 1
-        #     nnsol = [f(û[i],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # else
-        #     nnsol = [f([û[i], u1[i]],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # end
-        # form of NN output matrix output dim x n
-        # nnsol = reduce(hcat, nnsol)
-
-        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
-        # # convert to matrix as nnsol  
-
-        physlogprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # can add phystd[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 4.0) .*
-                        ones(length(nnsol[i, :]))))),
-                nnsol[i, :])
-        end
-        return physlogprob
-    else
-        return 0
-    end
-end
-
-# PDE(DU,U,P,T)=0
-
-# Derivated via Central Diff
-# function calculate_derivatives2(dataset)
-#     x̂, time = dataset
-#     num_points = length(x̂)
-#     # Initialize an array to store the derivative values.
-#     derivatives = similar(x̂)
-
-#     for i in 2:(num_points - 1)
-#         # Calculate the first-order derivative using central differences.
-#         Δt_forward = time[i + 1] - time[i]
-#         Δt_backward = time[i] - time[i - 1]
-
-#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-#         derivatives[i] = derivative
-#     end
-
-#     # Derivatives at the endpoints can be calculated using forward or backward differences.
-#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-#     return derivatives
-# end
-
-function calderivatives(prob, dataset)
-    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
-        Flux.Dense(8, 2)) |> Flux.f64
-    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-    function loss(x, y)
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
-        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
-        sum(Flux.mse.(chainflux(x), y))
-    end
-    optimizer = Flux.Optimise.ADAM(0.01)
-    epochs = 3000
-    for epoch in 1:epochs
-        Flux.train!(loss,
-            Flux.params(chainflux),
-            [(dataset[end]', dataset[1:(end - 1)])],
-            optimizer)
-    end
-
-    # A1 = (prob.u0' .+
-    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
-    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
-
-    # A2 = (prob.u0' .+
-    #       (prob.tspan[2] .- (dataset[end]'))' .*
-    #       chainflux(dataset[end]')')
-
-    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
-    A2 = chainflux(dataset[end]')
-
-    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
-
-    return gradients
-end
-
-function calculate_derivatives(dataset)
-
-    # u = dataset[1]
-    # u1 = dataset[2]
-    # t = dataset[end]
-    # # control points
-    # n = Int(floor(length(t) / 10))
-    # # spline for datasetvalues(solution) 
-    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    # interp = CubicSpline(u, t)
-    # interp1 = CubicSpline(u1, t)
-    # # derrivatives interpolation
-    # dx = t[2] - t[1]
-    # time = collect(t[1]:dx:t[end])
-    # smoothu = [interp(i) for i in time]
-    # smoothu1 = [interp1(i) for i in time]
-    # # derivative of the spline (must match function derivative) 
-    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
-    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
-    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
-    # # FDM
-    # # û1 = diff(u) / dx
-    # # dataset[1] and smoothu are almost equal(rounding errors)
-    # return [û, û1] 
-
-end
-
-"""
-L2 loss loglikelihood(needed for ODE parameter estimation)
-"""
-function L2LossData(Tar::LogTargetDensity, θ)
-    dataset = Tar.dataset
-    # check if dataset is provided
-    if dataset isa Vector{Nothing} || Tar.extraparams == 0
-        return 0
-    else
-        # matrix(each row corresponds to vector u's rows)
-        nn = Tar(dataset[end], θ[1:(length(θ) - Tar.extraparams)])
-
-        L2logprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
-            L2logprob += logpdf(MvNormal(nn[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 0.5) .*
-                        ones(length(dataset[i]))))),
-                dataset[i])
-        end
-        return L2logprob
-    end
-end
-
-"""
-physics loglikelihood over problem timespan + dataset timepoints
-"""
-function physloglikelihood(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-    p = Tar.prob.p
-    tspan = Tar.prob.tspan
-    autodiff = Tar.autodiff
-    strategy = Tar.strategy
-
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-    else
-        ode_params = p == SciMLBase.NullParameters() ? [] : p
-    end
-
-    return getlogpdf(strategy, Tar, f, autodiff, tspan, ode_params, θ)
-end
-
-function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
-    tspan,
-    ode_params, θ)
-    if Tar.dataset isa Vector{Nothing}
-        t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
-    else
-        t = vcat(collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]),
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
-end
-
-function getlogpdf(strategy::StochasticTraining,
-    Tar::LogTargetDensity,
-    f,
-    autodiff::Bool,
-    tspan,
-    ode_params,
-    θ)
-    if Tar.dataset isa Vector{Nothing}
-        t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
-    else
-        t = vcat([(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)],
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
-end
-
-function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
-    autodiff::Bool,
-    tspan,
-    ode_params, θ)
-    function integrand(t::Number, θ)
-        innerdiff(Tar, f, autodiff, [t], θ, ode_params)
-    end
-    intprob = IntegralProblem(integrand, tspan[1], tspan[2], θ; nout = length(Tar.prob.u0))
-    # add dataset logpdf?
-    sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
-    sum(sol.u)
-end
-
-function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
-    autodiff::Bool,
-    tspan,
-    ode_params, θ)
-    minT = tspan[1]
-    maxT = tspan[2]
-
-    weights = strategy.weights ./ sum(strategy.weights)
-
-    N = length(weights)
-    points = strategy.points
-
-    difference = (maxT - minT) / N
-
-    data = Float64[]
-    for (index, item) in enumerate(weights)
-        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
-                    ((index - 1) * difference)
-        data = append!(data, temp_data)
-    end
-
-    if Tar.dataset isa Vector{Nothing}
-        t = data
-    else
-        t = vcat(data,
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
-end
-
-"""
-MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ 
-"""
-function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
-    ode_params)
-
-    # Tar used for phi and LogTargetDensity object attributes access
-    out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
-
-    # # reject samples case(write clear reason why)
-    if any(isinf, out[:, 1]) || any(isinf, ode_params)
-        return -Inf
-    end
-
-    # this is a vector{vector{dx,dy}}(handle case single u(float passed))
-    if length(out[:, 1]) == 1
-        physsol = [f(out[:, i][1],
-            ode_params,
-            t[i])
-                   for i in 1:length(out[1, :])]
-    else
-        physsol = [f(out[:, i],
-            ode_params,
-            t[i])
-                   for i in 1:length(out[1, :])]
-    end
-    physsol = reduce(hcat, physsol)
-
-    nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-    vals = nnsol .- physsol
-
-    # N dimensional vector if N outputs for NN(each row has logpdf of i[i] where u is vector of dependant variables)
-    return [logpdf(MvNormal(vals[i, :],
-            LinearAlgebra.Diagonal(map(abs2,
-                Tar.phystd[i] .*
-                ones(length(vals[i, :]))))),
-        zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
-end
-
-"""
-prior logpdf for NN parameters + ODE constants
-"""
-function priorweights(Tar::LogTargetDensity, θ)
-    allparams = Tar.priors
-    # nn weights
-    nnwparams = allparams[1]
-
-    if Tar.extraparams > 0
-        # Vector of ode parameters priors
-        invpriors = allparams[2:end]
-
-        invlogpdf = sum(logpdf(invpriors[length(θ) - i + 1], θ[i])
-                        for i in (length(θ) - Tar.extraparams + 1):length(θ); init = 0.0)
-
-        return (invlogpdf
-                +
-                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
-    else
-        return logpdf(nnwparams, θ)
-    end
-end
-
-function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    return init_params, chain, st
-end
-
-function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    return θ, chain, st
-end
-
-function generate_Tar(chain::Flux.Chain, init_params)
-    θ, re = Flux.destructure(chain)
-    return init_params, re, nothing
-end
-
-function generate_Tar(chain::Flux.Chain, init_params::Nothing)
-    θ, re = Flux.destructure(chain)
-    # find_good_stepsize,phasepoint takes only float64
-    return θ, re, nothing
-end
-
-"""
-nn OUTPUT AT t,θ ~ phi(t,θ)
-"""
-function (f::LogTargetDensity{C, S})(t::AbstractVector,
-    θ) where {C <: Optimisers.Restructure, S}
-    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
-end
-
-function (f::LogTargetDensity{C, S})(t::AbstractVector,
-    θ) where {C <: Lux.AbstractExplicitLayer, S}
-    θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
-end
-
-function (f::LogTargetDensity{C, S})(t::Number,
-    θ) where {C <: Optimisers.Restructure, S}
-    #  must handle paired odes hence u0 broadcasted
-    f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t]))
-end
-
-function (f::LogTargetDensity{C, S})(t::Number,
-    θ) where {C <: Lux.AbstractExplicitLayer, S}
-    θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y
-end
-
-"""
-similar to ode_dfdx() in NNODE/ode_solve.jl
-"""
-function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
-    if autodiff
-        hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...)
-    else
-        (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
-    end
-end
-
-function kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog, δ, λ)
-    if Kernel == HMC
-        Kernel(n_leapfrog)
-    elseif Kernel == HMCDA
-        Kernel(δ, λ)
-    else
-        Kernel(δ, max_depth = max_depth, Δ_max = Δ_max)
-    end
-end
-
-function integratorchoice(Integrator, initial_ϵ, jitter_rate,
-    tempering_rate)
-    if Integrator == JitteredLeapfrog
-        Integrator(initial_ϵ, jitter_rate)
-    elseif Integrator == TemperedLeapfrog
-        Integrator(initial_ϵ, tempering_rate)
-    else
-        Integrator(initial_ϵ)
-    end
-end
-
-function adaptorchoice(Adaptor, mma, ssa)
-    if Adaptor != AdvancedHMC.NoAdaptation()
-        Adaptor(mma, ssa)
-    else
-        AdvancedHMC.NoAdaptation()
-    end
-end
-
-"""
-```julia
-ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
-                    dataset = [nothing],init_params = nothing, 
-                    draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
-                    phystd = [0.05], priorsNNw = (0.0, 2.0),
-                    param = [],nchains = 1,autodiff = false, Kernel = HMC,
-                    Integrator = Leapfrog, Adaptor = StanHMCAdaptor,
-                    targetacceptancerate = 0.8, Metric = DiagEuclideanMetric,
-                    jitter_rate = 3.0, tempering_rate = 3.0, max_depth = 10,
-                    Δ_max = 1000, n_leapfrog = 10, δ = 0.65, λ = 0.3,
-                    progress = false,verbose = false)
-```
-!!! warn
-
-    Note that ahmc_bayesian_pinn_ode() only supports ODEs which are written in the out-of-place form, i.e.
-    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the ahmc_bayesian_pinn_ode()
-    will exit with an error.
-
-## Example
-linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
-tspan = (0.0, 10.0)
-u0 = 0.0
-p = [5.0, -5.0]
-prob = ODEProblem(linear, u0, tspan, p)
-
-# CREATE DATASET (Necessity for accurate Parameter estimation)
-sol = solve(prob, Tsit5(); saveat = 0.05)
-u = sol.u[1:100]
-time = sol.t[1:100]
-
-# dataset and BPINN create
-x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
-dataset = [x̂, time]
-
-chainflux1 = Flux.Chain(Flux.Dense(1, 5, tanh), Flux.Dense(5, 5, tanh), Flux.Dense(5, 1)
-
-# simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
-fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob,chainflux1,
-                                                                          dataset = dataset,
-                                                                          draw_samples = 1500,
-                                                                          l2std = [0.05],
-                                                                          phystd = [0.05],
-                                                                          priorsNNw = (0.0,3.0))
-
-# solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
-fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob,chainflux1,
-                                                                          dataset = dataset,
-                                                                          draw_samples = 1500,
-                                                                          l2std = [0.05],
-                                                                          phystd = [0.05],
-                                                                          priorsNNw = (0.0,3.0),
-                                                                          param = [Normal(6.5,0.5),Normal(-3,0.5)])
-
-## NOTES 
-Dataset is required for accurate Parameter estimation + solving equations
-Incase you are only solving the Equations for solution, do not provide dataset
-
-## Positional Arguments
-* `prob`: DEProblem(out of place and the function signature should be f(u,p,t)
-* `chain`: Lux/Flux Neural Netork which would be made the Bayesian PINN
-
-## Keyword Arguments
-* `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
-* `dataset`: Vector containing Vectors of corresponding u,t values 
-* `init_params`: intial parameter values for BPINN (ideally for multiple chains different initializations preferred)
-* `nchains`: number of chains you want to sample (random initialisation of params by default)
-* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
-* `l2std`: standard deviation of BPINN predicition against L2 losses/Dataset
-* `phystd`: standard deviation of BPINN predicition against Chosen Underlying ODE System
-* `priorsNNw`: Vector of [mean, std] for BPINN parameter. Weights and Biases of BPINN are Normal Distributions by default
-* `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
-* `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
-* `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
-
-# AHMC.jl is still developing convenience structs so might need changes on new releases.
-* `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implemenations HMC/NUTS/HMCDA)
-* `targetacceptancerate`: Target percentage(in decimal) of iterations in which the proposals were accepted(0.8 by default)
-* `Integrator(jitter_rate, tempering_rate), Metric, Adaptor`: https://turinglang.org/AdvancedHMC.jl/stable/
-* `max_depth`: Maximum doubling tree depth (NUTS)
-* `Δ_max`: Maximum divergence during doubling tree (NUTS)
-* `n_leapfrog`: number of leapfrog steps for HMC
-* `δ`: target acceptance probability for NUTS/HMCDA
-* `λ`: target trajectory length for HMCDA
-* `progress`: controls whether to show the progress meter or not.
-* `verbose`: controls the verbosity. (Sample call args in AHMC)
-
-"""
-
-"""
-dataset would be (x̂,t)
-priors: pdf for W,b + pdf for ODE params
-"""
-function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
-    strategy = GridTraining, dataset = [nothing],
-    init_params = nothing, draw_samples = 1000,
-    physdt = 1 / 20.0, l2std = [0.05],
-    phystd = [0.05], priorsNNw = (0.0, 2.0),
-    param = [], nchains = 1, autodiff = false,
-    Kernel = HMC, Integrator = Leapfrog,
-    Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
-    Metric = DiagEuclideanMetric, jitter_rate = 3.0,
-    tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
-    n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = false,
-    verbose = false)
-
-    # NN parameter prior mean and variance(PriorsNN must be a tuple)
-    if isinplace(prob)
-        throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
-    end
-
-    strategy = strategy == GridTraining ? strategy(physdt) : strategy
-
-    if dataset != [nothing] &&
-       (length(dataset) < 2 || !(typeof(dataset) <: Vector{<:Vector{<:AbstractFloat}}))
-        throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
-    end
-
-    if dataset != [nothing] && param == []
-        println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
-    elseif dataset == [nothing] && param != []
-        throw(error("Dataset Required for Parameter Estimation."))
-    end
-
-    if chain isa Lux.AbstractExplicitLayer || chain isa Flux.Chain
-        # Flux-vector, Lux-Named Tuple
-        initial_nnθ, recon, st = generate_Tar(chain, init_params)
-    else
-        error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")
-    end
-
-    if nchains > Threads.nthreads()
-        throw(error("number of chains is greater than available threads"))
-    elseif nchains < 1
-        throw(error("number of chains must be greater than 1"))
-    end
-
-    # eltype(physdt) cause needs Float64 for find_good_stepsize
-    if chain isa Lux.AbstractExplicitLayer
-        # Lux chain(using component array later as vector_to_parameter need namedtuple)
-        initial_θ = collect(eltype(physdt),
-            vcat(ComponentArrays.ComponentArray(initial_nnθ)))
-    else
-        initial_θ = collect(eltype(physdt), initial_nnθ)
-    end
-
-    # adding ode parameter estimation
-    nparameters = length(initial_θ)
-    ninv = length(param)
-    priors = [
-        MvNormal(priorsNNw[1] * ones(nparameters),
-            LinearAlgebra.Diagonal(map(abs2, priorsNNw[2] .* ones(nparameters)))),
-    ]
-
-    # append Ode params to all paramvector
-    if ninv > 0
-        # shift ode params(initialise ode params by prior means)
-        initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
-        priors = vcat(priors, param)
-        nparameters += ninv
-    end
-
-    t0 = prob.tspan[1]
-    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
-    ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
-        phystd, l2std, autodiff, physdt, ninv, initial_nnθ)
-
-    try
-        ℓπ(t0, initial_θ[1:(nparameters - ninv)])
-    catch err
-        if isa(err, DimensionMismatch)
-            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
-        else
-            throw(err)
-        end
-    end
-
-    # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
-    metric = Metric(nparameters)
-    hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
-
-    println("physics Logpdf is : ", physloglikelihood(ℓπ, initial_θ))
-    println("prior Logpdf is : ", priorweights(ℓπ, initial_θ))
-    println("L2lossData Logpdf is : ", L2LossData(ℓπ, initial_θ))
-    println("L2loss2 Logpdf is : ", L2loss2(ℓπ, initial_θ))
-
-    # parallel sampling option
-    if nchains != 1
-        # Cache to store the chains
-        chains = Vector{Any}(undef, nchains)
-        statsc = Vector{Any}(undef, nchains)
-        samplesc = Vector{Any}(undef, nchains)
-
-        Threads.@threads for i in 1:nchains
-            # each chain has different initial NNparameter values(better posterior exploration)
-            initial_θ = vcat(randn(nparameters - ninv),
-                initial_θ[(nparameters - ninv + 1):end])
-            initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
-            integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate,
-                tempering_rate)
-            adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
-                StepSizeAdaptor(targetacceptancerate, integrator))
-            Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max,
-                    n_leapfrog, δ, λ), integrator)
-            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
-                progress = progress, verbose = verbose)
-
-            samplesc[i] = samples
-            statsc[i] = stats
-            mcmc_chain = Chains(hcat(samples...)')
-            chains[i] = mcmc_chain
-        end
-
-        return chains, samplesc, statsc
-    else
-        initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
-        integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate, tempering_rate)
-        adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
-            StepSizeAdaptor(targetacceptancerate, integrator))
-        Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog,
-                δ, λ), integrator)
-        samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
-            adaptor; progress = progress, verbose = verbose)
-
-        # return a chain(basic chain),samples and stats
-        matrix_samples = hcat(samples...)
-        mcmc_chain = MCMCChains.Chains(matrix_samples')
-        return mcmc_chain, samples, stats
-    end
-end
\ No newline at end of file
diff --git a/test/BPINN_newform.jl b/test/BPINN_newform.jl
deleted file mode 100644
index fa2f04073e..0000000000
--- a/test/BPINN_newform.jl
+++ /dev/null
@@ -1,4354 +0,0 @@
-# # Testing Code
-using Test, MCMCChains
-using ForwardDiff, Distributions, OrdinaryDiffEq
-using Flux, OptimizationOptimisers, AdvancedHMC, Lux
-using Statistics, Random, Functors, ComponentArrays
-using NeuralPDE, MonteCarloMeasurements
-
-# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
-# on latest Julia version it performs much better for below tests
-Random.seed!(100)
-
-# for sampled params->lux ComponentArray
-function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
-    @assert length(ps_new) == Lux.parameterlength(ps)
-    i = 1
-    function get_ps(x)
-        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
-        i += length(x)
-        return z
-    end
-    return Functors.fmap(get_ps, ps)
-end
-
-## PROBLEM-1 (WITHOUT PARAMETER ESTIMATION)
-linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-linear = (u, p, t) -> cos(2 * π * t)
-tspan = (0.0, 2.0)
-u0 = 0.0
-prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-p = prob.p
-
-# Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
-ta = range(tspan[1], tspan[2], length = 300)
-u = [linear_analytic(u0, nothing, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-time = vec(collect(Float64, ta))
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# testing points for solve() call must match saveat(1/50.0) arg
-ta0 = range(tspan[1], tspan[2], length = 101)
-u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
-x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
-time1 = vec(collect(Float64, ta0))
-physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-init1, re1 = destructure(chainflux)
-θinit, st = Lux.setup(Random.default_rng(), chainlux)
-
-fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux,
-    draw_samples = 2500,
-    n_leapfrog = 30)
-
-fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux,
-    draw_samples = 2500,
-    n_leapfrog = 30)
-
-# can change training strategies by adding this to call (Quadratuer and GridTraining show good results but stochastics sampling techniques perform bad)
-# strategy = QuadratureTraining(; quadrature_alg = QuadGKJL(),
-#     reltol = 1e-6,
-#     abstol = 1e-3, maxiters = 1000,
-#     batch = 0)
-
-alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500,
-    n_leapfrog = 30)
-sol1flux = solve(prob, alg)
-
-alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500,
-    n_leapfrog = 30)
-sol1lux = solve(prob, alg)
-
-# testing points
-t = time
-# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-out = re1.(fhsamples1[(end - 500):end])
-yu = collect(out[i](t') for i in eachindex(out))
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-θ = [vector_to_parameters(fhsamples1[i], θinit) for i in 2000:2500]
-luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# --------------------- ahmc_bayesian_pinn_ode() call
-@test mean(abs.(x̂ .- meanscurve1)) < 0.05
-@test mean(abs.(physsol1 .- meanscurve1)) < 0.005
-@test mean(abs.(x̂ .- meanscurve2)) < 0.05
-@test mean(abs.(physsol1 .- meanscurve2)) < 0.005
-
-#--------------------- solve() call 
-@test mean(abs.(x̂1 .- sol1flux.ensemblesol[1])) < 0.05
-@test mean(abs.(physsol0_1 .- sol1flux.ensemblesol[1])) < 0.05
-@test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
-@test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
-
-## PROBLEM-1 (WITH PARAMETER ESTIMATION)
-linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
-linear = (u, p, t) -> cos(p * t)
-tspan = (0.0, 2.0)
-u0 = 0.0
-p = 2 * pi
-prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
-
-# Numerical and Analytical Solutions
-sol1 = solve(prob, Tsit5(); saveat = 0.01)
-u = sol1.u
-time = sol1.t
-
-# BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
-ta = range(tspan[1], tspan[2], length = 25)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) .+ (0.2 .* Array(u) .* randn(size(u))))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
-ta0 = range(tspan[1], tspan[2], length = 101)
-u1 = [linear_analytic(u0, p, ti) for ti in ta0]
-x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
-time1 = vec(collect(Float64, ta0))
-physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-using Plots, StatsPlots
-# plot(dataset[2], calderivatives(dataset)')
-yu = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-plot(yu, [linear_analytic(u0, p, t) for t in yu])
-chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-init1, re1 = destructure(chainflux1)
-θinit, st = Lux.setup(Random.default_rng(), chainlux1)
-
-fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-    dataset = dataset,
-    draw_samples = 2500,
-    physdt = 1 / 50.0f0,
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(9,
-            0.5),
-    ],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
-
-fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-    dataset = dataset,
-    draw_samples = 2500,
-    physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 10.0),
-    l2std = [0.005], phystd = [0.01],
-    param = [Normal(11, 6)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
-# original paper (pure data 0 1)
-sol1flux = solve(prob, alg)
-sol1flux.estimated_ode_params
-# pure data method 1 1
-sol2flux = solve(prob, alg)
-sol2flux.estimated_ode_params
-# pure data method 1 0
-sol3flux = solve(prob, alg)
-sol3flux.estimated_ode_params
-# deri collocation
-sol4flux = solve(prob, alg)
-sol4flux.estimated_ode_params
-# collocation
-sol5flux = solve(prob, alg)
-sol5flux.estimated_ode_params
-# collocation + L2Data loss(at 9,0.5 1,2 gives same)
-sol6flux = solve(prob, alg)
-sol6flux.estimated_ode_params
-# 2500 iters
-sol7flux = solve(prob, alg)
-sol7flux.estimated_ode_params
-
-plotly()
-plot!(yu, sol1flux.ensemblesol[1])
-plot!(yu, sol2flux.ensemblesol[1])
-plot!(yu, sol3flux.ensemblesol[1])
-plot!(yu, sol4flux.ensemblesol[1])
-plot!(yu, sol5flux.ensemblesol[1])
-plot!(yu, sol6flux.ensemblesol[1])
-
-plot!(dataset[2], dataset[1])
-
-# plot!(sol4flux.ensemblesol[1])
-# plot!(sol5flux.ensemblesol[1])
-
-sol2flux.estimated_ode_params
-
-sol1flux.estimated_ode_params
-
-sol3flux.estimated_ode_params
-
-sol4flux.estimated_ode_params
-
-sol5flux.estimated_ode_params
-
-alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset,
-    draw_samples = 2500,
-    physdt = 1 / 50.0f0,
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(9,
-            0.5),
-    ],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30)
-
-sol2lux = solve(prob, alg)
-
-# testing points
-t = time
-# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-out = re1.([fhsamples1[i][1:22] for i in 2000:2500])
-yu = collect(out[i](t') for i in eachindex(out))
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-θ = [vector_to_parameters(fhsamples2[i][1:(end - 1)], θinit) for i in 2000:2500]
-luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# --------------------- ahmc_bayesian_pinn_ode() call  
-@test mean(abs.(physsol1 .- meanscurve1)) < 0.15
-@test mean(abs.(physsol1 .- meanscurve2)) < 0.15
-
-# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.25 * p)
-@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.25 * p)
-
-#-------------------------- solve() call  
-@test mean(abs.(physsol1_1 .- sol2flux.ensemblesol[1])) < 8e-2
-@test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
-
-# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - sol1flux.estimated_ode_params[1]) < abs(0.15 * p)
-@test abs(p - sol2lux.estimated_ode_params[1]) < abs(0.15 * p)
-
-## PROBLEM-2
-linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
-tspan = (0.0, 10.0)
-u0 = 0.0
-p = -5.0
-prob = ODEProblem(linear, u0, tspan, p)
-linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
-
-# SOLUTION AND CREATE DATASET
-sol = solve(prob, Tsit5(); saveat = 0.1)
-u = sol.u
-time = sol.t
-x̂ = u .+ (u .* 0.2) .* randn(size(u))
-dataset = [x̂, time]
-t = sol.t
-physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)]
-
-ta0 = range(tspan[1], tspan[2], length = 501)
-u1 = [linear_analytic(u0, p, ti) for ti in ta0]
-time1 = vec(collect(Float64, ta0))
-physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-chainflux12 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh),
-    Flux.Dense(6, 1)) |> Flux.f64
-chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
-init1, re1 = destructure(chainflux12)
-θinit, st = Lux.setup(Random.default_rng(), chainlux12)
-
-using Flux
-using Random
-
-function derivatives(chainflux, dataset)
-    loss(x, y) = Flux.mse(chainflux(x), y)
-    optimizer = Flux.Optimise.ADAM(0.01)
-    epochs = 2500
-    for epoch in 1:epochs
-        Flux.train!(loss, Flux.params(chainflux), [(dataset[2]', dataset[1]')], optimizer)
-    end
-    getgradient(chainflux, dataset)
-end
-
-function getgradient(chainflux, dataset)
-    return (chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64)))) .-
-            chainflux(dataset[end]')) ./
-           sqrt(eps(eltype(dataset[end][1])))
-end
-
-ans = derivatives(chainflux12, dataset)
-
-init3, re = destructure(chainflux12)
-init2 == init1
-init3 == init2
-plot!(dataset[end], ans')
-plot!(dataset[end], chainflux12(dataset[end]')')
-
-ars = getgradient(chainflux12, dataset)
-
-plot!(dataset[end], ars')
-
-fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob,
-    chainflux12,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [
-        0.03],
-    priorsNNw = (0.0,
-        10.0),
-    n_leapfrog = 30)
-
-fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(prob,
-    chainflux12,
-    dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [
-        0.03,
-    ],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ],
-    n_leapfrog = 30)
-
-fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [0.03],
-    priorsNNw = (0.0,
-        10.0),
-    n_leapfrog = 30)
-
-fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
-    dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [0.03],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ],
-    n_leapfrog = 30)
-
-alg1 = NeuralPDE.BNNODE(chainflux12,
-    dataset = dataset,
-    draw_samples = 500,
-    l2std = [0.01],
-    phystd = [
-        0.03,
-    ],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ],
-    n_leapfrog = 30, progress = true)
-
-# original paper (pure data 0 1)
-sol1flux_pestim = solve(prob, alg1)
-sol1flux_pestim.estimated_ode_params
-# pure data method 1 1
-sol2flux_pestim = solve(prob, alg1)
-sol2flux_pestim.estimated_ode_params
-# pure data method 1 0
-sol3flux_pestim = solve(prob, alg1)
-sol3flux_pestim.estimated_ode_params
-# deri collocation
-sol4flux_pestim = solve(prob, alg1)
-sol4flux_pestim.estimated_ode_params
-# collocation
-sol5flux_pestim = solve(prob, alg1)
-sol5flux_pestim.estimated_ode_params
-# collocation + L2Data loss(at 9,0.5 1,2 gives same)
-sol6flux_pestim = solve(prob, alg1)
-sol6flux_pestim.estimated_ode_params
-
-using Plots, StatsPlots
-ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-plot(time, u)
-plot!(ars, sol1flux_pestim.ensemblesol[1])
-plot!(ars, sol2flux_pestim.ensemblesol[1])
-plot!(ars, sol3flux_pestim.ensemblesol[1])
-plot!(ars, sol4flux_pestim.ensemblesol[1])
-plot!(ars, sol5flux_pestim.ensemblesol[1])
-plot!(ars, sol6flux_pestim.ensemblesol[1])
-
-sol3flux_pestim.estimated_ode_params
-
-sol4flux_pestim.estimated_ode_params
-
-sol5flux_pestim.estimated_ode_params
-
-sol6flux_pestim.estimated_ode_params
-
-ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-
-init, re1 = destructure(chainflux12)
-init
-init1
-alg = NeuralPDE.BNNODE(chainlux12,
-    dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [0.03],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ],
-    n_leapfrog = 30)
-
-sol3lux_pestim = solve(prob, alg)
-
-# testing timepoints
-t = sol.t
-#------------------------------ ahmc_bayesian_pinn_ode() call 
-# Mean of last 500 sampled parameter's curves(flux chains)[Ensemble predictions]
-out = re1.([fhsamplesflux12[i][1:61] for i in 1000:1500])
-yu = [out[i](t') for i in eachindex(out)]
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1_1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-out = re1.([fhsamplesflux22[i][1:61] for i in 1000:1500])
-yu = [out[i](t') for i in eachindex(out)]
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1_2 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-@test mean(abs.(sol.u .- meanscurve1_1)) < 1e-2
-@test mean(abs.(physsol1 .- meanscurve1_1)) < 1e-2
-@test mean(abs.(sol.u .- meanscurve1_2)) < 5e-2
-@test mean(abs.(physsol1 .- meanscurve1_2)) < 5e-2
-
-# estimated parameters(flux chain)
-param1 = mean(i[62] for i in fhsamplesflux22[1000:1500])
-@test abs(param1 - p) < abs(0.3 * p)
-
-# Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
-θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
-luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500]
-luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-@test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
-@test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
-@test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
-@test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
-
-# estimated parameters(lux chain)
-param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
-@test abs(param1 - p) < abs(0.3 * p)
-
-#-------------------------- solve() call 
-# (flux chain)
-@test mean(abs.(physsol2 .- sol3flux_pestim.ensemblesol[1])) < 0.15
-# estimated parameters(flux chain)
-param1 = sol3flux_pestim.estimated_ode_params[1]
-@test abs(param1 - p) < abs(0.45 * p)
-
-# (lux chain)
-@test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
-# estimated parameters(lux chain)
-param1 = sol3lux_pestim.estimated_ode_params[1]
-@test abs(param1 - p) < abs(0.45 * p)
-
-using Plots, StatsPlots
-using NoiseRobustDifferentiation, Weave, DataInterpolations
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood
-# # 25 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-#     draw_samples = 1500, physdt = 1 / 50.0f0, phystd = [0.01],
-#     l2std = [0.01],
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1 = solve(prob, alg)
-# sol2flux1.estimated_ode_params[1] #6.41722 Particles{Float64, 1}, 6.02404 Particles{Float64, 1}
-# sol2flux2 = solve(prob, alg)
-# sol2flux2.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.07509 Particles{Float64, 1}
-# sol2flux3 = solve(prob, alg)
-# sol2flux3.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.00825 Particles{Float64, 1}
-
-# # 50 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11 = solve(prob, alg)
-# sol2flux11.estimated_ode_params[1] #5.71268 Particles{Float64, 1}, 6.07242 Particles{Float64, 1}
-# sol2flux22 = solve(prob, alg)
-# sol2flux22.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.04837 Particles{Float64, 1}
-# sol2flux33 = solve(prob, alg)
-# sol2flux33.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.02838 Particles{Float64, 1}
-
-# # 100 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111 = solve(prob, alg)
-# sol2flux111.estimated_ode_params[1] #6.59097 Particles{Float64, 1}, 5.89384 Particles{Float64, 1}
-# sol2flux222 = solve(prob, alg)
-# sol2flux222.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.88216 Particles{Float64, 1}
-# sol2flux333 = solve(prob, alg)
-# sol2flux333.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.85327 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, full likelihood cdm
-# # 25 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1_cdm = solve(prob, alg)
-# sol2flux1_cdm.estimated_ode_params[1]# 6.50506 Particles{Float64, 1} ,6.38963 Particles{Float64, 1}
-# sol2flux2_cdm = solve(prob, alg)
-# sol2flux2_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.39817 Particles{Float64, 1}
-# sol2flux3_cdm = solve(prob, alg)
-# sol2flux3_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.36296 Particles{Float64, 1}
-
-# # 50 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11_cdm = solve(prob, alg)
-# sol2flux11_cdm.estimated_ode_params[1] #6.52951 Particles{Float64, 1},5.15621 Particles{Float64, 1}
-# sol2flux22_cdm = solve(prob, alg)
-# sol2flux22_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.16363 Particles{Float64, 1}
-# sol2flux33_cdm = solve(prob, alg)
-# sol2flux33_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.15591 Particles{Float64, 1}
-
-# # 100 points 
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111_cdm = solve(prob, alg)
-# sol2flux111_cdm.estimated_ode_params[1] #6.74338 Particles{Float64, 1}, 9.72422 Particles{Float64, 1}
-# sol2flux222_cdm = solve(prob, alg)
-# sol2flux222_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.71991 Particles{Float64, 1}
-# sol2flux333_cdm = solve(prob, alg)
-# sol2flux333_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.75045 Particles{Float64, 1}
-
-# --------------------------------------------------------------------------------------
-#                              NEW SERIES OF TESTS (IN ORDER OF EXECUTION)
-#  -------------------------------------------------------------------------------------
-# original paper implementaion
-# 25 points
-ta = range(tspan[1], tspan[2], length = 25)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, u .+ 0.05 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset1 = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-# scatter!(time, u)
-# dataset
-# scatter!(dataset1[2], dataset1[1])
-# plot(time, physsol1)
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_normal = solve(prob, alg)
-sol2flux1_normal.estimated_ode_params[1]  #7.70593 Particles{Float64, 1}, 6.36096 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
-sol2flux2_normal = solve(prob, alg)
-sol2flux2_normal.estimated_ode_params[1] #6.66347 Particles{Float64, 1}, 6.36974 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
-sol2flux3_normal = solve(prob, alg)
-sol2flux3_normal.estimated_ode_params[1] #6.84827 Particles{Float64, 1}, 6.29555 Particles{Float64, 1} | 6.39947 Particles{Float64, 1}
-
-# 50 points
-ta = range(tspan[1], tspan[2], length = 50)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset2 = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_normal = solve(prob, alg)
-sol2flux11_normal.estimated_ode_params[1] #7.83577 Particles{Float64, 1},6.24652 Particles{Float64, 1} | 6.34495 Particles{Float64, 1}
-sol2flux22_normal = solve(prob, alg)
-sol2flux22_normal.estimated_ode_params[1] #6.49477 Particles{Float64, 1},6.2118 Particles{Float64, 1} | 6.32476 Particles{Float64, 1}
-sol2flux33_normal = solve(prob, alg)
-sol2flux33_normal.estimated_ode_params[1] #6.47421 Particles{Float64, 1},6.33687 Particles{Float64, 1} | 6.2448 Particles{Float64, 1}
-
-# 100 points
-ta = range(tspan[1], tspan[2], length = 100)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset3 = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_normal = solve(prob, alg)
-sol2flux111_normal.estimated_ode_params[1] #5.96604 Particles{Float64, 1},5.99588 Particles{Float64, 1} | 6.19805 Particles{Float64, 1}
-sol2flux222_normal = solve(prob, alg)
-sol2flux222_normal.estimated_ode_params[1] #6.05432 Particles{Float64, 1},6.0768 Particles{Float64, 1} | 6.22948 Particles{Float64, 1}
-sol2flux333_normal = solve(prob, alg)
-sol2flux333_normal.estimated_ode_params[1] #6.08856 Particles{Float64, 1},5.94819 Particles{Float64, 1} | 6.2551 Particles{Float64, 1}
-
-# LOTKA VOLTERRA CASE 
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u01 = [1.0, 1.0]
-p1 = [1.5, 1.0, 3.0, 1.0]
-tspan1 = (0.0, 6.0)
-prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
-
-# chainlux = Lux.Chain(Lux.Dense(1, 7, Lux.tanh), Lux.Dense(7, 7, Lux.tanh), Lux.Dense(7, 2))
-chainflux1 = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2))
-
-#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
-t1 = collect(Float64, prob1.tspan[1]:(1 / 50.0):prob1.tspan[2])
-
-# --------------------------------------------------------------------------
-# original paper implementaion lotka volterra
-# 31 points  
-solution1 = solve(prob1, Tsit5(); saveat = 0.1)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] .+ 0.3 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] .+ 0.3 .* u1[2, :] .* randn(length(u1[2, :]))
-dataset2_1 = [x1, y1, time1]
-plot(dataset2_1[end], dataset2_1[1])
-plot!(dataset2_1[end], dataset2_1[2])
-plot!(time1, u1[1, :])
-plot!(time1, u1[2, :])
-
-alg1 = NeuralPDE.BNNODE(chainflux1,
-    dataset = dataset2_1,
-    draw_samples = 1000,
-    physdt = 1 / 20.0,
-    l2std = [
-        0.2,
-        0.2,
-    ],
-    phystd = [
-        0.5,
-        0.5,
-    ],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(4,
-            3),
-        Normal(-2,
-            4),
-        Normal(0,
-            5),
-        Normal(2.5,
-            2)],
-    n_leapfrog = 30, progress = true)
-
-# original paper (pure data 0 1)
-sol1flux1_lotka = solve(prob1, alg1)
-sol1flux1_lotka.estimated_ode_params
-# pure data method 1 1
-sol2flux1_lotka = solve(prob1, alg1)
-sol2flux1_lotka.estimated_ode_params
-# pure data method 1 0
-sol3flux1_lotka = solve(prob1, alg1)
-sol3flux1_lotka.estimated_ode_params
-# deri collocation
-sol4flux1_lotka = solve(prob1, alg1)
-sol4flux1_lotka.estimated_ode_params
-# collocation
-sol5flux1_lotka = solve(prob1, alg1)
-sol5flux1_lotka.estimated_ode_params
-# collocation + L2Data loss(at 9,0.5 1,2 gives same)
-sol6flux1_lotka = solve(prob1, alg1)
-sol6flux1_lotka.estimated_ode_params
-
-sol7flux1_lotka = solve(prob1, alg1)
-sol7flux1_lotka.estimated_ode_params
-
-using Plots, StatsPlots
-plot(dataset2_1[3], u1[1, :])
-plot!(dataset2_1[3], u1[2, :])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol5flux1_normal.ensemblesol[2])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]),
-    sol1flux1_normal.ensemblesol[1],
-    legend = :outerbottomleft)
-sol1flux2_normal = solve(prob1, alg1)
-sol1flux2_normal.estimated_ode_params  #|
-sol1flux3_normal = solve(prob1, alg1)
-sol1flux3_normal.estimated_ode_params  #|
-sol1flux4_normal = solve(prob1, alg1)
-sol1flux4_normal.estimated_ode_params
-
-plotly()
-plot!(title = "yuh")
-plot!(dataset2_1[3], dataset2_1[1])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux1_normal.ensemblesol[1])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux2_normal.ensemblesol[1])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux3_normal.ensemblesol[2])
-plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux4_normal.ensemblesol[1])
-plot(time1, u1[1, :])
-plot!(time1, u1[2, :])
-
-ars = chainflux1(dataset2_1[end]')
-plot(ars[1, :])
-plot!(ars[2, :])
-
-function calculate_derivatives(dataset)
-    u = dataset[1]
-    u1 = dataset[2]
-    t = dataset[end]
-    # control points
-    n = Int(floor(length(t) / 10))
-    # spline for datasetvalues(solution) 
-    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    interp = CubicSpline(u, t)
-    interp1 = CubicSpline(u1, t)
-    # derrivatives interpolation
-    dx = t[2] - t[1]
-    time = collect(t[1]:dx:t[end])
-    smoothu = [interp(i) for i in time]
-    smoothu1 = [interp1(i) for i in time]
-    # derivative of the spline (must match function derivative) 
-    û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
-    û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
-    # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
-    # FDM
-    # û1 = diff(u) / dx
-    # dataset[1] and smoothu are almost equal(rounding errors)
-    return û, û1
-    # return 1
-end
-
-ar = calculate_derivatives(dataset2_1)
-plot(ar[1])
-plot!(ar[2])
-
-# 61 points
-solution1 = solve(prob1, Tsit5(); saveat = 0.1)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_2 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_2,
-    draw_samples = 1000,
-    l2std = [
-        0.1,
-        0.1,
-    ],
-    phystd = [
-        0.1,
-        0.1,
-    ],
-    priorsNNw = (0.0,
-        5.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_normal = solve(prob1, alg1)
-sol1flux11_normal.estimated_ode_params #|
-sol1flux22_normal = solve(prob1, alg1)
-sol1flux22_normal.estimated_ode_params #|
-sol1flux33_normal = solve(prob1, alg1)
-sol1flux33_normal.estimated_ode_params #|
-
-# 121 points
-solution1 = solve(prob1, Tsit5(); saveat = 0.05)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_3 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_3,
-    draw_samples = 1000,
-    l2std = [
-        0.1,
-        0.1,
-    ],
-    phystd = [
-        0.1,
-        0.1,
-    ],
-    priorsNNw = (0.0,
-        5.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_normal = solve(prob1, alg1)
-sol1flux111_normal.estimated_ode_params #|
-sol1flux222_normal = solve(prob1, alg1)
-sol1flux222_normal.estimated_ode_params #|
-sol1flux333_normal = solve(prob1, alg1)
-sol1flux333_normal.estimated_ode_params #| 
-
-# -------------------------------------------------------------------- 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# Sampling 100%|███████████████████████████████| Time: 0:02:30
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# Sampling 100%|███████████████████████████████| Time: 0:01:54
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# Sampling 100%|███████████████████████████████| Time: 0:01:59
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# Sampling 100%|███████████████████████████████| Time: 0:02:44
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# Sampling 100%|███████████████████████████████| Time: 0:02:41
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# Sampling 100%|███████████████████████████████| Time: 0:02:41
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# Sampling 100%|███████████████████████████████| Time: 0:03:52
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# Sampling 100%|███████████████████████████████| Time: 0:03:49
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# Sampling 100%|███████████████████████████████| Time: 0:03:50
-
-# # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
-# physics Logpdf is : -6.659143464386241e7
-# prior Logpdf is : -150.30074579848434
-# L2lossData Logpdf is : -6.03075717462954e6
-# Sampling 100%|███████████████████████████████| Time: 0:04:54
-
-# physics Logpdf is : -8.70012053004202e8
-# prior Logpdf is : -150.3750892952511
-# L2lossData Logpdf is : -6.967914805207133e6
-# Sampling 100%|███████████████████████████████| Time: 0:05:09
-
-# physics Logpdf is : -5.417241281343099e7
-# prior Logpdf is : -150.52079555737976
-# L2lossData Logpdf is : -4.195953436792884e6
-# Sampling 100%|███████████████████████████████| Time: 0:05:01
-
-# physics Logpdf is : -4.579552981943833e8
-# prior Logpdf is : -150.30491731974283
-# L2lossData Logpdf is : -8.595475827260146e6
-# Sampling 100%|███████████████████████████████| Time: 0:06:08
-
-# physics Logpdf is : -1.989281834955769e7
-# prior Logpdf is : -150.16009042727543
-# L2lossData Logpdf is : -1.121270659669029e7
-# Sampling 100%|███████████████████████████████| Time: 0:05:38
-
-# physics Logpdf is : -8.683829147264534e8
-# prior Logpdf is : -150.37824872259102
-# L2lossData Logpdf is : -1.0887662888035845e7
-# Sampling 100%|███████████████████████████████| Time: 0:05:50
-
-# physics Logpdf is : -3.1944760610332566e8
-# prior Logpdf is : -150.33610348737565
-# L2lossData Logpdf is : -1.215458786744478e7
-# Sampling 100%|███████████████████████████████| Time: 0:10:50
-
-# physics Logpdf is : -3.2884572300341567e6
-# prior Logpdf is : -150.21002268156343
-# L2lossData Logpdf is : -1.102536731511176e7
-# Sampling 100%|███████████████████████████████| Time: 0:09:53
-
-# physics Logpdf is : -5.31293521002414e8
-# prior Logpdf is : -150.20948536040126
-# L2lossData Logpdf is : -1.818717239584132e7
-# Sampling 100%|███████████████████████████████| Time: 0:08:53
-
-# ----------------------------------------------------------
-# Full likelihood no l2 only new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new = solve(prob, alg)
-sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.21662 Particles{Float64, 1}
-sol2flux2_new = solve(prob, alg)
-sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 7.14238 Particles{Float64, 1}
-sol2flux3_new = solve(prob, alg)
-sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.79159 Particles{Float64, 1}
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new = solve(prob, alg)
-sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 5.33467 Particles{Float64, 1}
-sol2flux22_new = solve(prob, alg)
-sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.52419 Particles{Float64, 1}
-sol2flux33_new = solve(prob, alg)
-sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 5.36921 Particles{Float64, 1}
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new = solve(prob, alg)
-sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.45333 Particles{Float64, 1}
-sol2flux222_new = solve(prob, alg)
-sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 4.64417 Particles{Float64, 1}
-sol2flux333_new = solve(prob, alg)
-sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 5.88037 Particles{Float64, 1}
-# ---------------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new_all = solve(prob, alg)
-sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.4358 Particles{Float64, 1}
-sol2flux2_new_all = solve(prob, alg)
-sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 6.52449 Particles{Float64, 1}
-sol2flux3_new_all = solve(prob, alg)
-sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.34188 Particles{Float64, 1}
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new_all = solve(prob, alg)
-sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.37889 Particles{Float64, 1}
-sol2flux22_new_all = solve(prob, alg)
-sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.34747 Particles{Float64, 1}
-sol2flux33_new_all = solve(prob, alg)
-sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.39699 Particles{Float64, 1}
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new_all = solve(prob, alg)
-sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.24327 Particles{Float64, 1}
-sol2flux222_new_all = solve(prob, alg)
-sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 6.23928 Particles{Float64, 1}
-sol2flux333_new_all = solve(prob, alg)
-sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 6.2145 Particles{Float64, 1}
-
-# ---------------------------------------------------------------------------
-# Full likelihood  l2 + new L22(dataset gradients) lotka volterra
-# 36 points 
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_1,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux1_new_all = solve(prob1, alg1)
-sol1flux1_new_all.estimated_ode_params[1]  #|
-sol1flux2_new_all = solve(prob1, alg1)
-sol1flux2_new_all.estimated_ode_params[1] #|
-sol1flux3_new_all = solve(prob1, alg1)
-sol1flux3_new_all.estimated_ode_params[1] #|
-
-# 61 points 
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_2,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_new_all = solve(prob1, alg1)
-sol1flux11_new_all.estimated_ode_params[1] #|
-sol1flux22_new_all = solve(prob1, alg1)
-sol1flux22_new_all.estimated_ode_params[1] #|
-sol1flux33_new_all = solve(prob1, alg1)
-sol1flux33_new_all.estimated_ode_params[1] #|
-
-# 121 points 
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_3,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_new_all = solve(prob1, alg1)
-sol1flux111_new_all.estimated_ode_params[1] #|
-sol1flux222_new_all = solve(prob1, alg1)
-sol1flux222_new_all.estimated_ode_params[1] #|
-sol1flux333_new_all = solve(prob1, alg1)
-sol1flux333_new_all.estimated_ode_params[1] #|
-# -------------------------------------------------------------------- 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# L2loss2 Logpdf is : -757.9047847584478
-# Sampling 100%|███████████████████████████████| Time: 0:02:32
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# L2loss2 Logpdf is : -757.9047847584478
-# Sampling 100%|███████████████████████████████| Time: 0:02:19
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -640.4155412187399
-# L2loss2 Logpdf is : -757.9047847584478
-# Sampling 100%|███████████████████████████████| Time: 0:02:31
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# L2loss2 Logpdf is : -1517.3653615845183
-# Sampling 100%|███████████████████████████████| Time: 0:03:45
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# L2loss2 Logpdf is : -1517.3653615845183
-# Sampling 100%|███████████████████████████████| Time: 0:03:20
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1198.9147562830894
-# L2loss2 Logpdf is : -1517.3653615845183
-# Sampling 100%|███████████████████████████████| Time: 0:03:20
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# L2loss2 Logpdf is : -3037.8868319811254
-# Sampling 100%|███████████████████████████████| Time: 0:04:57
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# L2loss2 Logpdf is : -3037.8868319811254
-# Sampling 100%|███████████████████████████████| Time: 0:05:26
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -2473.741390504424
-# L2loss2 Logpdf is : -3037.8868319811254
-# Sampling 100%|███████████████████████████████| Time: 0:05:01
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(dataset gradients)
-# 25 points
-# 1*,2*,  
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_newdata_all = solve(prob, alg)
-sol2flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 5.73072 Particles{Float64, 1}
-sol2flux2_newdata_all = solve(prob, alg)
-sol2flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 5.71597 Particles{Float64, 1}
-sol2flux3_newdata_all = solve(prob, alg)
-sol2flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 5.7313 Particles{Float64, 1}
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_newdata_all = solve(prob, alg)
-sol2flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.07153 Particles{Float64, 1}
-sol2flux22_newdata_all = solve(prob, alg)
-sol2flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.06623 Particles{Float64, 1}
-sol2flux33_newdata_all = solve(prob, alg)
-sol2flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.12748 Particles{Float64, 1}
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_newdata_all = solve(prob, alg)
-sol2flux111_newdata_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.26222 Particles{Float64, 1}
-sol2flux222_newdata_all = solve(prob, alg)
-sol2flux222_newdata_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 5.86494 Particles{Float64, 1}
-sol2flux333_newdata_all = solve(prob, alg)
-sol2flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} |  
-
-# ---------------------------------------------------------------------------
-
-# LOTKA VOLTERRA CASE
-using Plots, StatsPlots
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u01 = [1.0, 1.0]
-p1 = [1.5, 1.0, 3.0, 1.0]
-tspan1 = (0.0, 6.0)
-prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
-
-chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
-
-#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
-t1 = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-
-# --------------------------------------------------------------------------
-# original paper implementaion
-# 25 points  
-solution1 = solve(prob1, Tsit5(); saveat = 0.2)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_1 = [x1, y1, time1]
-
-plot(time1, u1[1, :])
-plot!(time1, u1[2, :])
-scatter!(dataset2_1[3], dataset2_1[1])
-scatter!(dataset2_1[3], dataset2_1[2])
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_1,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux1_normal = solve(prob1, alg1)
-sol1flux1_normal.estimated_ode_params[1]  #|
-sol1flux2_normal = solve(prob1, alg1)
-sol1flux2_normal.estimated_ode_params[1] #|
-sol1flux3_normal = solve(prob1, alg1)
-sol1flux3_normal.estimated_ode_params[1] #|
-
-# 50 points
-solution1 = solve(prob1, Tsit5(); saveat = 0.05)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_2 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_2,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_normal = solve(prob1, alg1)
-sol1flux11_normal.estimated_ode_params[1] #|
-sol1flux22_normal = solve(prob1, alg1)
-sol1flux22_normal.estimated_ode_params[1] #|
-sol1flux33_normal = solve(prob1, alg1)
-sol1flux33_normal.estimated_ode_params[1] #|
-
-# 100 points
-solution = solve(prob1, Tsit5(); saveat = 0.05)
-time1 = solution1.t
-physsol1_1 = solution1.u
-u1 = hcat(solution1.u...)
-x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
-dataset2_3 = [x1, y1, time1]
-
-alg1 = NeuralPDE.BNNODE(chainlux,
-    dataset = dataset2_3,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_normal = solve(prob1, alg1)
-sol1flux111_normal.estimated_ode_params[1] #|
-sol1flux222_normal = solve(prob1, alg1)
-sol1flux222_normal.estimated_ode_params[1] #|
-sol1flux333_normal = solve(prob1, alg1)
-sol1flux333_normal.estimated_ode_params[1] #|
-
-# --------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood no l2 only new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new = solve(prob, alg)
-sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
-sol2flux2_new = solve(prob, alg)
-sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
-sol2flux3_new = solve(prob, alg)
-sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new = solve(prob, alg)
-sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
-sol2flux22_new = solve(prob, alg)
-sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
-sol2flux33_new = solve(prob, alg)
-sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new = solve(prob, alg)
-sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}   |
-sol2flux222_new = solve(prob, alg)
-sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}     |
-sol2flux333_new = solve(prob, alg)
-sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
-# ---------------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(NN gradients)
-# 25 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux1_new_all = solve(prob, alg)
-sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1}  |
-sol2flux2_new_all = solve(prob, alg)
-sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}    |
-sol2flux3_new_all = solve(prob, alg)
-sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}   |
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux11_new_all = solve(prob, alg)
-sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
-sol2flux22_new_all = solve(prob, alg)
-sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
-sol2flux33_new_all = solve(prob, alg)
-sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol2flux111_new_all = solve(prob, alg)
-sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}  |
-sol2flux222_new_all = solve(prob, alg)
-sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}    |
-sol2flux333_new_all = solve(prob, alg)
-sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}  |
-
-# ---------------------------------------------------------------------------
-
-# ----------------------------------------------------------
-# Full likelihood  l2 + new L22(dataset gradients)
-# 25 points 
-# *1,*2 vs *2.5
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol1flux1_newdata_all = solve(prob, alg)
-sol1flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
-sol1flux2_newdata_all = solve(prob, alg)
-sol1flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
-sol1flux3_newdata_all = solve(prob, alg)
-sol1flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
-
-# 50 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol1flux11_newdata_all = solve(prob, alg)
-sol1flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}    |
-sol1flux22_newdata_all = solve(prob, alg)
-sol1flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}    |
-sol1flux33_newdata_all = solve(prob, alg)
-sol1flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1}   |
-
-# 100 points 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-sol1flux111_newdata_all = solve(prob, alg)
-sol1flux111_newdata_all.estimated_ode_params[1]  #|
-sol1flux222_newdata_all = solve(prob, alg)
-sol1flux222_newdata_all.estimated_ode_params[1]  #|
-sol1flux333_newdata_all = solve(prob, alg)
-sol1flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
-
-# ------------------------------------------------------------------------------------------------------------------------------
-
-# sol2flux111.estimated_ode_params[1]
-# # mine *5
-# 7.03386Particles{Float64, 1}
-# # normal
-# 6.38951Particles{Float64, 1}
-# 6.67657Particles{Float64, 1}
-# # mine *10
-# 7.53672Particles{Float64, 1}
-# # mine *2
-# 6.29005Particles{Float64, 1}
-# 6.29844Particles{Float64, 1}
-
-# # new mine *2
-# 6.39008Particles{Float64, 1}
-# 6.22071Particles{Float64, 1}
-# 6.15611Particles{Float64, 1}
-
-# # new mine *2 tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
-# 6.25549Particles{Float64, 1}
-# ----------------------------------------------------------
-
-# ---------------------------------------------------
-
-function calculate_derivatives1(dataset)
-    x̂, time = dataset
-    num_points = length(x̂)
-    # Initialize an array to store the derivative values.
-    derivatives = similar(x̂)
-
-    for i in 2:(num_points - 1)
-        # Calculate the first-order derivative using central differences.
-        Δt_forward = time[i + 1] - time[i]
-        Δt_backward = time[i] - time[i - 1]
-
-        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-        derivatives[i] = derivative
-    end
-
-    # Derivatives at the endpoints can be calculated using forward or backward differences.
-    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-    return derivatives
-end
-
-function calculate_derivatives2(dataset)
-    u = dataset[1]
-    t = dataset[2]
-    # control points
-    n = Int(floor(length(t) / 10))
-    # spline for datasetvalues(solution) 
-    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    interp = CubicSpline(u, t)
-    # derrivatives interpolation
-    dx = t[2] - t[1]
-    time = collect(t[1]:dx:t[end])
-    smoothu = [interp(i) for i in time]
-    # derivative of the spline (must match function derivative) 
-    û = tvdiff(smoothu, 20, 0.03, dx = dx, ε = 1)
-    # tvdiff(smoothu, 100, 0.1, dx = dx)
-    # 
-    # 
-    # FDM
-    û1 = diff(u) / dx
-    # dataset[1] and smoothu are almost equal(rounding errors)
-    return û, time, smoothu, û1
-end
-
-# need to do this for all datasets
-c = [linear(prob.u0, p, t) for t in dataset3[2]] #ideal case
-b = calculate_derivatives1(dataset2) #central diffs
-# a = calculate_derivatives2(dataset) #tvdiff(smoothu, 100, 0.1, dx = dx)
-d = calculate_derivatives2(dataset1) #tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
-d = calculate_derivatives2(dataset2)
-d = calculate_derivatives2(dataset3)
-mean(abs2.(c .- b))
-mean(abs2.(c .- d[1]))
-loss(model, x, y) = mean(abs2.(model(x) .- y));
-scatter!(prob.u0 .+ (prob.tspan[2] .- dataset3[2]) .* chainflux1(dataset3[2]')')
-loss(chainflux1, dataset3[2]', dataset3[1]')
-# mean(abs2.(c[1:24] .- a[4]))
-plot(c, label = "ideal deriv")
-plot!(b, label = "Centraldiff deriv")
-# plot!(a[1], label = "tvdiff(0.1,def) derivatives") 
-plot!(d[1], label = "tvdiff(0.035,20) derivatives")
-plotly()
-
-# GridTraining , NoiseRobustDiff dataset[2][2]-dataset[2][1] l2std
-# 25 points 
-ta = range(tspan[1], tspan[2], length = 25)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-time1 = collect(tspan[1]:(1 / 50.0):tspan[2])
-physsol = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-plot(physsol, label = "solution")
-
-# plots from 32(deriv)
-# for d
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux1 = solve(prob, alg)
-n2_sol2flux1.estimated_ode_params[1]
-# with extra likelihood 
-# 10.2011Particles{Float64, 1}
-
-# without extra likelihood 
-# 6.25791Particles{Float64, 1}
-# 6.29539Particles{Float64, 1}
-
-plot!(n2_sol2flux1.ensemblesol[1], label = "tvdiff(0.035,1) derivpar")
-plot(dataset[1])
-plot!(physsol1)
-# for a
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux2 = solve(prob, alg)
-n2_sol2flux2.estimated_ode_params[1]
-# with extra likelihood
-# 8.73602Particles{Float64, 1}
-# without extra likelihood
-
-plot!(n2_sol2flux2.ensemblesol[1],
-    label = "tvdiff(0.1,def) derivatives",
-    legend = :outerbottomleft)
-
-# for b
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux3 = solve(prob, alg)
-n2_sol2flux3.estimated_ode_params[1]
-plot!(n2_sol2flux3.ensemblesol[1], label = "Centraldiff deriv")
-
-# for c
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2000, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux4 = solve(prob, alg)
-n2_sol2flux4.estimated_ode_params[1]
-plot!(n2_sol2flux4.ensemblesol[1], label = "ideal deriv")
-
-# 50 points 
-
-ta = range(tspan[1], tspan[2], length = 50)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux11 = solve(prob, alg)
-n2_sol2flux11.estimated_ode_params[1]
-
-# 5.90049Particles{Float64, 1}
-# 100 points
-ta = range(tspan[1], tspan[2], length = 100)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 1500, physdt = 1 / 50.0f0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)],
-    Metric = DiagEuclideanMetric,
-    n_leapfrog = 30, progress = true)
-
-n2_sol2flux111 = solve(prob, alg)
-n2_sol2flux111.estimated_ode_params[1]
-plot!(n2_sol2flux111.ensemblesol[1])
-8.88555Particles{Float64, 1}
-
-# 7.15353Particles{Float64, 1}
-# 6.21059 Particles{Float64, 1}
-# 6.31836Particles{Float64, 1}
-0.1 * p
-# ----------------------------------------------------------
-
-# Gives the linear interpolation value at t=3.5
-
-# # Problem 1 with param esimation
-# # dataset 0-1 2 percent noise
-# p = 6.283185307179586
-# # partial_logdensity
-# 6.3549Particles{Float64, 1}
-# # full log_density
-# 6.34667Particles{Float64, 1}
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2lux.estimated_ode_params[1]
-
-# # dataset 0-1 20 percent noise
-# # partial log_density
-# 6.30244Particles{Float64, 1}
-# # full log_density
-# 6.24637Particles{Float64, 1}
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # dataset 0-2 20percent noise
-# # partial log_density
-# 6.24948Particles{Float64, 1}
-# # full log_density
-# 6.26095Particles{Float64, 1}
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
-# linear = (u, p, t) -> cos(p * t)
-# tspan = (0.0, 2.0)
-
-# # dataset 0-1 2 percent noise
-# p = 6.283185307179586
-# # partial_logdensity
-# 6.3549Particles{Float64, 1}
-# # full log_density
-# 6.34667Particles{Float64, 1}
-
-# # dataset 0-1 20 percent noise
-# # partial log_density
-# 6.30244Particles{Float64, 1}
-# # full log_density
-# 6.24637Particles{Float64, 1}
-
-# # dataset 0-2 20percent noise
-# # partial log_density
-# 6.24948Particles{Float64, 1}
-# # full log_density
-# 6.26095Particles{Float64, 1}
-
-# # dataset 0-2 20percent noise 50 points(above all are 100 points)
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# # i kinda win on 25 points again
-# # dataset 0-2 20percent noise 25 points
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # i win with 25 points
-# # dataset 0-1 20percent noise 25 points
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# # new
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# # New
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # (9,2.5)(above are (9,0.5))
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# # just prev was repeat(just change)
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # i lose on 0-1,50 points
-# # dataset 0-1 20percent noise 50 points
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # (9,2.5) (above are (9,0.5))
-# # FuLL log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # partial log_density
-# sol2flux.estimated_ode_params[1]
-# sol2flux.estimated_ode_params[1]
-
-# # ----------------------------------------------------------
-# # Problem 1 with param estimation
-# # physdt=1/20, Full likelihood new 0.5*l2std
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n05_sol2flux1 = solve(prob, alg)
-# n05_sol2flux1.estimated_ode_params[1] #6.90953 Particles{Float64, 1}
-# n05_sol2flux2 = solve(prob, alg)
-# n05_sol2flux2.estimated_ode_params[1] #6.82374 Particles{Float64, 1}
-# n05_sol2flux3 = solve(prob, alg)
-# n05_sol2flux3.estimated_ode_params[1] #6.84465 Particles{Float64, 1}
-
-# using Plots, StatsPlots
-# plot(n05_sol2flux3.ensemblesol[1])
-# plot!(physsol1)
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n05_sol2flux11 = solve(prob, alg)
-# n05_sol2flux11.estimated_ode_params[1] #7.0262 Particles{Float64, 1}
-# n05_sol2flux22 = solve(prob, alg)
-# n05_sol2flux22.estimated_ode_params[1] #5.56438 Particles{Float64, 1}
-# n05_sol2flux33 = solve(prob, alg)
-# n05_sol2flux33.estimated_ode_params[1] #7.27189 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n05_sol2flux111 = solve(prob, alg)
-# n05_sol2flux111.estimated_ode_params[1] #6.90549 Particles{Float64, 1}
-# n05_sol2flux222 = solve(prob, alg)
-# n05_sol2flux222.estimated_ode_params[1] #5.42436 Particles{Float64, 1}
-# n05_sol2flux333 = solve(prob, alg)
-# n05_sol2flux333.estimated_ode_params[1] #6.05832 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new 2*l2std
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2_sol2flux1 = solve(prob, alg)
-# n2_sol2flux1.estimated_ode_params[1]#6.9087 Particles{Float64, 1}
-# n2_sol2flux2 = solve(prob, alg)
-# n2_sol2flux2.estimated_ode_params[1]#6.86507 Particles{Float64, 1}
-# n2_sol2flux3 = solve(prob, alg)
-# n2_sol2flux3.estimated_ode_params[1]#6.59206 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2_sol2flux11 = solve(prob, alg)
-# n2_sol2flux11.estimated_ode_params[1]#7.3715 Particles{Float64, 1}
-# n2_sol2flux22 = solve(prob, alg)
-# n2_sol2flux22.estimated_ode_params[1]#9.84477 Particles{Float64, 1}
-# n2_sol2flux33 = solve(prob, alg)
-# n2_sol2flux33.estimated_ode_params[1]#6.87107 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2_sol2flux111 = solve(prob, alg)
-# n2_sol2flux111.estimated_ode_params[1]#6.60739 Particles{Float64, 1}
-# n2_sol2flux222 = solve(prob, alg)
-# n2_sol2flux222.estimated_ode_params[1]#7.05923 Particles{Float64, 1}
-# n2_sol2flux333 = solve(prob, alg)
-# n2_sol2flux333.estimated_ode_params[1]#6.5017 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new all 2*l2std
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2all5sol2flux1 = solve(prob, alg)
-# n2all5sol2flux1.estimated_ode_params[1]#11.3659 Particles{Float64, 1}
-# n2all5sol2flux2 = solve(prob, alg)
-# n2all5sol2flux2.estimated_ode_params[1]#6.65634 Particles{Float64, 1}
-# n2all5sol2flux3 = solve(prob, alg)
-# n2all5sol2flux3.estimated_ode_params[1]#6.61905 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2all5sol2flux11 = solve(prob, alg)
-# n2all5sol2flux11.estimated_ode_params[1]#6.27555 Particles{Float64, 1}
-# n2all5sol2flux22 = solve(prob, alg)
-# n2all5sol2flux22.estimated_ode_params[1]#6.24352 Particles{Float64, 1}
-# n2all5sol2flux33 = solve(prob, alg)
-# n2all5sol2flux33.estimated_ode_params[1]#6.33723 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n2all5sol2flux111 = solve(prob, alg)
-# n2all5sol2flux111.estimated_ode_params[1] #5.95535 Particles{Float64, 1}
-# n2all5sol2flux222 = solve(prob, alg)
-# n2all5sol2flux222.estimated_ode_params[1] #5.98301 Particles{Float64, 1}
-# n2all5sol2flux333 = solve(prob, alg)
-# n2all5sol2flux333.estimated_ode_params[1] #5.9081 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new all (l2+l22)
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nall5sol2flux1 = solve(prob, alg)
-# nall5sol2flux1.estimated_ode_params[1]#6.54705 Particles{Float64, 1}
-# nall5sol2flux2 = solve(prob, alg)
-# nall5sol2flux2.estimated_ode_params[1]#6.6967 Particles{Float64, 1}
-# nall5sol2flux3 = solve(prob, alg)
-# nall5sol2flux3.estimated_ode_params[1]#6.47173 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nall5sol2flux11 = solve(prob, alg)
-# nall5sol2flux11.estimated_ode_params[1]#6.2113 Particles{Float64, 1}
-# nall5sol2flux22 = solve(prob, alg)
-# nall5sol2flux22.estimated_ode_params[1]#6.10675 Particles{Float64, 1}
-# nall5sol2flux33 = solve(prob, alg)
-# nall5sol2flux33.estimated_ode_params[1]#6.11541 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nall5sol2flux111 = solve(prob, alg)
-# nall5sol2flux111.estimated_ode_params[1]#6.35224 Particles{Float64, 1}
-# nall5sol2flux222 = solve(prob, alg)
-# nall5sol2flux222.estimated_ode_params[1]#6.40542 Particles{Float64, 1}
-# nall5sol2flux333 = solve(prob, alg)
-# nall5sol2flux333.estimated_ode_params[1]#6.44206 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new 5* (new only l22 mod)
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n5sol2flux1 = solve(prob, alg)
-# n5sol2flux1.estimated_ode_params[1]#7.05077 Particles{Float64, 1}
-# n5sol2flux2 = solve(prob, alg)
-# n5sol2flux2.estimated_ode_params[1]#7.07303 Particles{Float64, 1}
-# n5sol2flux3 = solve(prob, alg)
-# n5sol2flux3.estimated_ode_params[1]#5.10622 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n5sol2flux11 = solve(prob, alg)
-# n5sol2flux11.estimated_ode_params[1]#7.39852 Particles{Float64, 1}
-# n5sol2flux22 = solve(prob, alg)
-# n5sol2flux22.estimated_ode_params[1]#7.30319 Particles{Float64, 1}
-# n5sol2flux33 = solve(prob, alg)
-# n5sol2flux33.estimated_ode_params[1]#6.73722 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# n5sol2flux111 = solve(prob, alg)
-# n5sol2flux111.estimated_ode_params[1]#7.15996 Particles{Float64, 1}
-# n5sol2flux222 = solve(prob, alg)
-# n5sol2flux222.estimated_ode_params[1]#7.02949 Particles{Float64, 1}
-# n5sol2flux333 = solve(prob, alg)
-# n5sol2flux333.estimated_ode_params[1]#6.9393 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood new
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nsol2flux1 = solve(prob, alg)
-# nsol2flux1.estimated_ode_params[1] #5.82707 Particles{Float64, 1}
-# nsol2flux2 = solve(prob, alg)
-# nsol2flux2.estimated_ode_params[1] #4.81534 Particles{Float64, 1}
-# nsol2flux3 = solve(prob, alg)
-# nsol2flux3.estimated_ode_params[1] #5.52965 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nsol2flux11 = solve(prob, alg)
-# nsol2flux11.estimated_ode_params[1] #7.04027 Particles{Float64, 1}
-# nsol2flux22 = solve(prob, alg)
-# nsol2flux22.estimated_ode_params[1] #7.17588 Particles{Float64, 1}
-# nsol2flux33 = solve(prob, alg)
-# nsol2flux33.estimated_ode_params[1] #6.94495 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# nsol2flux111 = solve(prob, alg)
-# nsol2flux111.estimated_ode_params[1] #6.06608 Particles{Float64, 1}
-# nsol2flux222 = solve(prob, alg)
-# nsol2flux222.estimated_ode_params[1] #6.84726 Particles{Float64, 1}
-# nsol2flux333 = solve(prob, alg)
-# nsol2flux333.estimated_ode_params[1] #6.83463 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1 = solve(prob, alg)
-# sol2flux1.estimated_ode_params[1] #6.71397 Particles{Float64, 1} 6.37604 Particles{Float64, 1}
-# sol2flux2 = solve(prob, alg)
-# sol2flux2.estimated_ode_params[1] #6.73509 Particles{Float64, 1} 6.21692 Particles{Float64, 1}
-# sol2flux3 = solve(prob, alg)
-# sol2flux3.estimated_ode_params[1] #6.65453 Particles{Float64, 1} 6.23153 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11 = solve(prob, alg)
-# sol2flux11.estimated_ode_params[1] #6.23443 Particles{Float64, 1} 6.30635 Particles{Float64, 1}
-# sol2flux22 = solve(prob, alg)
-# sol2flux22.estimated_ode_params[1] #6.18879 Particles{Float64, 1} 6.30099 Particles{Float64, 1}
-# sol2flux33 = solve(prob, alg)
-# sol2flux33.estimated_ode_params[1] #6.22773 Particles{Float64, 1} 6.30671 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111 = solve(prob, alg)
-# sol2flux111.estimated_ode_params[1] #6.15832 Particles{Float64, 1} 6.35453 Particles{Float64, 1}
-# sol2flux222 = solve(prob, alg)
-# sol2flux222.estimated_ode_params[1] #6.16968 Particles{Float64, 1}6.31125 Particles{Float64, 1}
-# sol2flux333 = solve(prob, alg)
-# sol2flux333.estimated_ode_params[1] #6.12466 Particles{Float64, 1} 6.26514 Particles{Float64, 1}
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood
-# # 25 points
-# ta = range(tspan[1], tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux1_p = solve(prob, alg)
-# sol2flux1_p.estimated_ode_params[1] #5.74065 Particles{Float64, 1} #6.83683 Particles{Float64, 1}
-# sol2flux2_p = solve(prob, alg)
-# sol2flux2_p.estimated_ode_params[1] #9.82504 Particles{Float64, 1} #6.14568 Particles{Float64, 1}
-# sol2flux3_p = solve(prob, alg)
-# sol2flux3_p.estimated_ode_params[1] #5.75075 Particles{Float64, 1} #6.08579 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux11_p = solve(prob, alg)
-# sol2flux11_p.estimated_ode_params[1] #6.19414 Particles{Float64, 1} #6.04621 Particles{Float64, 1}
-# sol2flux22_p = solve(prob, alg)
-# sol2flux22_p.estimated_ode_params[1] #6.15227 Particles{Float64, 1} #6.29086 Particles{Float64, 1}
-# sol2flux33_p = solve(prob, alg)
-# sol2flux33_p.estimated_ode_params[1] #6.19048 Particles{Float64, 1} #6.12516 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol2flux111_p = solve(prob, alg)
-# sol2flux111_p.estimated_ode_params[1] #6.51608 Particles{Float64, 1}# 6.42945Particles{Float64, 1}
-# sol2flux222_p = solve(prob, alg)
-# sol2flux222_p.estimated_ode_params[1] #6.4875 Particles{Float64, 1} # 6.44524Particles{Float64, 1}
-# sol2flux333_p = solve(prob, alg)
-# sol2flux333_p.estimated_ode_params[1] #6.51679 Particles{Float64, 1}# 6.43152Particles{Float64, 1}
-
-# # ---------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood, dataset(1.0-2.0)
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux1 = solve(prob, alg)
-# sol1flux1.estimated_ode_params[1] #6.35164 Particles{Float64, 1}
-# sol1flux2 = solve(prob, alg)
-# sol1flux2.estimated_ode_params[1] #6.30919 Particles{Float64, 1}
-# sol1flux3 = solve(prob, alg)
-# sol1flux3.estimated_ode_params[1] #6.33554 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux11 = solve(prob, alg)
-# sol1flux11.estimated_ode_params[1] #6.39769 Particles{Float64, 1}
-# sol1flux22 = solve(prob, alg)
-# sol1flux22.estimated_ode_params[1] #6.43924 Particles{Float64, 1}
-# sol1flux33 = solve(prob, alg)
-# sol1flux33.estimated_ode_params[1] #6.4697 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux111 = solve(prob, alg)
-# sol1flux111.estimated_ode_params[1] #6.27812 Particles{Float64, 1}
-# sol1flux222 = solve(prob, alg)
-# sol1flux222.estimated_ode_params[1] #6.19278 Particles{Float64, 1}
-# sol1flux333 = solve(prob, alg)
-# sol1flux333.estimated_ode_params[1] # 9.68244Particles{Float64, 1} (first try) # 6.23969 Particles{Float64, 1}(second try)
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(1.0-2.0)
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux1_p = solve(prob, alg)
-# sol1flux1_p.estimated_ode_params[1]#6.36269 Particles{Float64, 1}
-
-# sol1flux2_p = solve(prob, alg)
-# sol1flux2_p.estimated_ode_params[1]#6.34685 Particles{Float64, 1}
-
-# sol1flux3_p = solve(prob, alg)
-# sol1flux3_p.estimated_ode_params[1]#6.31421 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux11_p = solve(prob, alg)
-# sol1flux11_p.estimated_ode_params[1] #6.15725 Particles{Float64, 1}
-
-# sol1flux22_p = solve(prob, alg)
-# sol1flux22_p.estimated_ode_params[1] #6.18145 Particles{Float64, 1}
-
-# sol1flux33_p = solve(prob, alg)
-# sol1flux33_p.estimated_ode_params[1] #6.21905 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1flux111_p = solve(prob, alg)
-# sol1flux111_p.estimated_ode_params[1]#6.13481 Particles{Float64, 1}
-
-# sol1flux222_p = solve(prob, alg)
-# sol1flux222_p.estimated_ode_params[1]#9.68555 Particles{Float64, 1}
-
-# sol1flux333_p = solve(prob, alg)
-# sol1flux333_p.estimated_ode_params[1]#6.1477 Particles{Float64, 1}
-
-# # -----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(1-2), again but different density
-# # 12 points
-# ta = range(1.0, tspan[2], length = 12)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux1_p = solve(prob, alg)
-# sol3flux1_p.estimated_ode_params[1]#6.50048 Particles{Float64, 1}
-# sol3flux2_p = solve(prob, alg)
-# sol3flux2_p.estimated_ode_params[1]#6.57597 Particles{Float64, 1}
-# sol3flux3_p = solve(prob, alg)
-# sol3flux3_p.estimated_ode_params[1]#6.24487 Particles{Float64, 1}
-
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux11_p = solve(prob, alg)
-# sol3flux11_p.estimated_ode_params[1]#6.53093 Particles{Float64, 1}
-
-# sol3flux22_p = solve(prob, alg)
-# sol3flux22_p.estimated_ode_params[1]#6.32744 Particles{Float64, 1}
-
-# sol3flux33_p = solve(prob, alg)
-# sol3flux33_p.estimated_ode_params[1]#6.49175 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux111_p = solve(prob, alg)
-# sol3flux111_p.estimated_ode_params[1]#6.4455 Particles{Float64, 1}
-# sol3flux222_p = solve(prob, alg)
-# sol3flux222_p.estimated_ode_params[1]#6.40736 Particles{Float64, 1}
-# sol3flux333_p = solve(prob, alg)
-# sol3flux333_p.estimated_ode_params[1]#6.46214 Particles{Float64, 1}
-
-# # ---------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(0-1)
-# # 25 points
-# ta = range(tspan[1], 1.0, length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol0flux1_p = solve(prob, alg)
-# sol0flux1_p.estimated_ode_params[1]#7.12625 Particles{Float64, 1}
-# sol0flux2_p = solve(prob, alg)
-# sol0flux2_p.estimated_ode_params[1]#8.40948 Particles{Float64, 1}
-# sol0flux3_p = solve(prob, alg)
-# sol0flux3_p.estimated_ode_params[1]#7.18768 Particles{Float64, 1}
-
-# # 50 points
-# ta = range(tspan[1], 1.0, length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol0flux11_p = solve(prob, alg)
-# sol0flux11_p.estimated_ode_params[1]#6.23707 Particles{Float64, 1}
-# sol0flux22_p = solve(prob, alg)
-# sol0flux22_p.estimated_ode_params[1]#6.09728 Particles{Float64, 1}
-# sol0flux33_p = solve(prob, alg)
-# sol0flux33_p.estimated_ode_params[1]#6.12971 Particles{Float64, 1}
-
-# # 100 points
-# ta = range(tspan[1], 1.0, length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [LogNormal(9, 0.5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol0flux111_p = solve(prob, alg)
-# sol0flux111_p.estimated_ode_params[1]#5.99039 Particles{Float64, 1}
-# sol0flux222_p = solve(prob, alg)
-# sol0flux222_p.estimated_ode_params[1]#5.89609 Particles{Float64, 1}
-# sol0flux333_p = solve(prob, alg)
-# sol0flux333_p.estimated_ode_params[1]#5.91923 Particles{Float64, 1}
-
-# # ---------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, Full likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 6.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f1 = solve(prob, alg)
-# sol1f1.estimated_ode_params[1]
-# # 10.9818Particles{Float64, 1}
-# sol1f2 = solve(prob, alg)
-# sol1f2.estimated_ode_params[1]
-# # sol1f3 = solve(prob, alg)
-# # sol1f3.estimated_ode_params[1]
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 6.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f11 = solve(prob, alg)
-# sol1f11.estimated_ode_params[1]
-# sol1f22 = solve(prob, alg)
-# sol1f22.estimated_ode_params[1]
-# # sol1f33 = solve(prob, alg)
-# # sol1f33.estimated_ode_params[1]
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 6.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f111 = solve(prob, alg)
-# sol1f111.estimated_ode_params[1]
-# sol1f222 = solve(prob, alg)
-# sol1f222.estimated_ode_params[1]
-# # sol1f333 = solve(prob, alg)
-# # sol1f333.estimated_ode_params[1]
-
-# # ----------------------------------------------------------
-
-# # ----------------------------------------------------------
-# # physdt=1/20, partial likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
-# # 25 points
-# ta = range(1.0, tspan[2], length = 25)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f1_p = solve(prob, alg)
-# sol1f1_p.estimated_ode_params[1]
-# sol1f2_p = solve(prob, alg)
-# sol1f2_p.estimated_ode_params[1]
-# sol1f3_p = solve(prob, alg)
-# sol1f3_p.estimated_ode_params[1]
-
-# # 50 points
-# ta = range(1.0, tspan[2], length = 50)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f11_p = solve(prob, alg)
-# sol1f11_p.estimated_ode_params[1]
-# sol1f22_p = solve(prob, alg)
-# sol1f22_p.estimated_ode_params[1]
-# sol1f33_p = solve(prob, alg)
-# sol1f33_p.estimated_ode_params[1]
-
-# # 100 points
-# ta = range(1.0, tspan[2], length = 100)
-# u = [linear_analytic(u0, p, ti) for ti in ta]
-# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂, time]
-# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-#     draw_samples = 1500, physdt = 1 / 50.0f0,
-#     priorsNNw = (0.0, 3.0),
-#     param = [Normal(12, 5)],
-#     Metric = DiagEuclideanMetric,
-#     n_leapfrog = 30, progress = true)
-
-# sol1f111_p = solve(prob, alg)
-# sol1f111_p.estimated_ode_params[1]
-# sol1f222_p = solve(prob, alg)
-# sol1f222_p.estimated_ode_params[1]
-# sol1f333_p = solve(prob, alg)
-# sol1f333_p.estimated_ode_params[1]
-
-# # ----------------------------------------------------------
-
-# plot!(title = "9,2.5 50 training 2>full,1>partial")
-
-# p
-# param1
-# # (lux chain)
-# @prob mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 8e-2
-
-# # estimated parameters(lux chain)
-# param1 = sol3lux_pestim.estimated_ode_params[1]
-# @test abs(param1 - p) < abs(0.35 * p)
-
-# p
-# param1
-
-# # # my suggested Loss likelihood part
-# # #  + L2loss2(Tar, θ)
-# # # My suggested extra loss function
-# # function L2loss2(Tar::LogTargetDensity, θ)
-# #     f = Tar.prob.f
-
-# #     # parameter estimation chosen or not
-# #     if Tar.extraparams > 0
-# #         dataset = Tar.dataset
-
-# #         # Timepoints to enforce Physics
-# #         dataset = Array(reduce(hcat, dataset)')
-# #         t = dataset[end, :]
-# #         û = dataset[1:(end - 1), :]
-
-# #         ode_params = Tar.extraparams == 1 ?
-# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-# #         if length(û[:, 1]) == 1
-# #             physsol = [f(û[:, i][1],
-# #                 ode_params,
-# #                 t[i])
-# #                        for i in 1:length(û[1, :])]
-# #         else
-# #             physsol = [f(û[:, i],
-# #                 ode_params,
-# #                 t[i])
-# #                        for i in 1:length(û[1, :])]
-# #         end
-# #         #form of NN output matrix output dim x n
-# #         deri_physsol = reduce(hcat, physsol)
-
-# #         #   OG deriv(basically gradient matching in case of an ODEFunction)
-# #         # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
-# #         # if length(û[:, 1]) == 1
-# #         #     deri_sol = [f(û[:, i][1],
-# #         #         Tar.prob.p,
-# #         #         t[i])
-# #         #                 for i in 1:length(û[1, :])]
-# #         # else
-# #         #     deri_sol = [f(û[:, i],
-# #         #         Tar.prob.p,
-# #         #         t[i])
-# #         #                 for i in 1:length(û[1, :])]
-# #         # end
-# #         # deri_sol = reduce(hcat, deri_sol)
-# #         derivatives = calculate_derivatives(Tar.dataset)
-# #         deri_sol = reduce(hcat, derivatives)
-
-# #         physlogprob = 0
-# #         for i in 1:length(Tar.prob.u0)
-# #             # can add phystd[i] for u[i]
-# #             physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-# #                     LinearAlgebra.Diagonal(map(abs2,
-# #                         Tar.l2std[i] .*
-# #                         ones(length(deri_sol[i, :]))))),
-# #                 deri_sol[i, :])
-# #         end
-# #         return physlogprob
-# #     else
-# #         return 0
-# #     end
-# # end
-
-# # function calculate_derivatives(dataset)
-# #     x̂, time = dataset
-# #     num_points = length(x̂)
-
-# #     # Initialize an array to store the derivative values.
-# #     derivatives = similar(x̂)
-
-# #     for i in 2:(num_points - 1)
-# #         # Calculate the first-order derivative using central differences.
-# #         Δt_forward = time[i + 1] - time[i]
-# #         Δt_backward = time[i] - time[i - 1]
-
-# #         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-# #         derivatives[i] = derivative
-# #     end
-
-# #     # Derivatives at the endpoints can be calculated using forward or backward differences.
-# #     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-# #     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-
-# #     return derivatives
-# # end
-
-# size(dataset[1])
-# # Problem 1 with param estimation(flux,lux)
-# # Normal
-# # 6.20311 Particles{Float64, 1},6.21746Particles{Float64, 1}
-# # better
-# # 6.29093Particles{Float64, 1}, 6.27925Particles{Float64, 1}
-# # Non ideal case
-# # 6.14861Particles{Float64, 1}, 
-# sol2flux.estimated_ode_params
-# sol2lux.estimated_ode_params[1]
-# p
-# size(sol3flux_pestim.ensemblesol[2])
-# plott = sol3flux_pestim.ensemblesol[1]
-# using StatsPlots
-# plotly()
-# plot(t, sol3flux_pestim.ensemblesol[1])
-
-# function calculate_derivatives(dataset)
-#     x̂, time = dataset
-#     num_points = length(x̂)
-
-#     # Initialize an array to store the derivative values.
-#     derivatives = similar(x̂)
-
-#     for i in 2:(num_points - 1)
-#         # Calculate the first-order derivative using central differences.
-#         Δt_forward = time[i + 1] - time[i]
-#         Δt_backward = time[i] - time[i - 1]
-
-#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-#         derivatives[i] = derivative
-#     end
-
-#     # Derivatives at the endpoints can be calculated using forward or backward differences.
-#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-
-#     return derivatives
-# end
-
-# # Example usage:
-# # dataset = [x̂, time]
-# derivatives = calculate_derivatives(dataset)
-# dataset[1]
-# # Access derivative values at specific time points as needed.
-
-# # # 9,0.5
-# # 0.09894916260292887
-# # 0.09870335436072103
-# # 0.08398556878067913
-# # 0.10109070099105527
-# # 0.09122683737517055
-# # 0.08614958011892977
-# # mean(abs.(x̂ .- meanscurve1)) #0.017112298305523976
-# # mean(abs.(physsol1 .- meanscurve1)) #0.004038636894341354
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1))#0.01800876370000113
-# # mean(abs.(physsol1 .- meanscurve1))#0.007285681280600875
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.10599926120358046
-# # mean(abs.(physsol1 .- meanscurve1)) #0.10375554193397989
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.10160824458252521
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09999942538357891
-
-# # # ------------------------------------------------normale
-# # # 9,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.0333356493928835
-# # mean(abs.(physsol1 .- meanscurve1)) #0.02721733876400459
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1)) #0.020734206709433347
-# # mean(abs.(physsol1 .- meanscurve1)) #0.012502850740700212
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.10615859683094729
-# # mean(abs.(physsol1 .- meanscurve1)) #0.10508141153722575
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.10833514946031565
-# # mean(abs.(physsol1 .- meanscurve1)) #0.10668470203219232
-
-# # # 9,0.5
-# # 10.158108285475553
-# # 10.207234384538026
-# # 10.215000657664852
-# # 10.213817644016174
-# # 13.380030074088719
-# # 13.348906350967326
-
-# # 6.952731422892041
-
-# # # All losses
-# # 10.161478523326277
-# # # L2 losses 1
-# # 9.33312996960278
-# # # L2 losses 2
-# # 10.217417241370631
-
-# # mean([fhsamples1[i][26] for i in 500:1000]) #6.245045767509431
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.212522300650451
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #35.328636809737695
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #35.232963812125654
-
-# # # ---------------------------------------normale
-# # # 9,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.547771572198114
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.158906185002702
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.210400972620185
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.153845019454522
-
-# # # ----------------more dataset normale -----------------------------
-# # # 9,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.271141178216537
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.241144692919369
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.124480447973127
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.07838011629903
-
-# # # 9,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.016551602015599295
-# # mean(abs.(physsol1 .- meanscurve1)) #0.0021488618484224245
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1)) #0.017022725082640747
-# # mean(abs.(physsol1 .- meanscurve1)) #0.004339761917100232
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.09668785317864312
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09430712337543362
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.09958118358974392
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09717454226368502
-
-# # # ----------------more dataset special -----------------------------
-# # # 9,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.284355334485365
-# # p #6.283185307179586
-# # # 9,4
-# # mean([fhsamples1[i][23] for i in 500:1000]) #6.259238106698602
-# # # 30,30
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.139808934336987
-# # # 30,0.5
-# # mean([fhsamples1[i][23] for i in 500:1000]) #29.03921327641226
-
-# # # 9,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.016627231605546876
-# # mean(abs.(physsol1 .- meanscurve1)) #0.0020311429130039564
-# # # 9,4(little worse)
-# # mean(abs.(x̂ .- meanscurve1)) #0.016650324577507352
-# # mean(abs.(physsol1 .- meanscurve1)) #0.0027537543411154677
-# # # 30,30
-# # mean(abs.(x̂ .- meanscurve1)) #0.09713187937270151
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
-# # # 30,0.5
-# # mean(abs.(x̂ .- meanscurve1)) #0.09550234866855814
-# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
-
-# # using Plots, StatsPlots
-# # plotly()
-
-# # ---------------------------------------------------------
-# # # # Distribution abstract in wrapper, dataset Float64
-# # # 268.651 s (206393690 allocations: 388.71 GiB)
-# # # 318.170551 seconds (206.29 M allocations: 388.453 GiB, 20.83% gc time)
-
-# # # # Above with dataset Real subtype
-# # # 326.201 s (206327409 allocations: 388.42 GiB)
-# # # 363.189370 seconds (206.25 M allocations: 387.975 GiB, 15.77% gc time)
-# # # 306.171 s (206321277 allocations: 388.55 GiB)
-# # # 356.180699 seconds (206.43 M allocations: 388.361 GiB, 13.77% gc time)
-
-# # # # Above with dataset AbstractFloat subtype
-# # # 290.751187 seconds (205.94 M allocations: 387.955 GiB, 12.92% gc time)
-# # # 296.319815 seconds (206.38 M allocations: 388.730 GiB, 12.69% gc time)
-
-# # # # ODEProblem float64 dtaset and vector distri inside
-# # #   273.169 s (206128318 allocations: 388.40 GiB)
-# # #   274.059531 seconds (205.91 M allocations: 387.953 GiB, 12.77% gc time)
-
-# # # #   Dataset float64 inside and vector distri outsude
-# # #   333.603 s (206251143 allocations: 388.41 GiB)
-# # # 373.377222 seconds (206.11 M allocations: 387.968 GiB, 13.25% gc time)
-# # #   359.745 s (206348301 allocations: 388.41 GiB)
-# # # 357.813114 seconds (206.31 M allocations: 388.354 GiB, 13.54% gc time)
-
-# # # # Dataset float64 inside and vector distri inside
-# # #   326.437 s (206253571 allocations: 388.41 GiB)
-# # #   290.334083 seconds (205.92 M allocations: 387.954 GiB, 13.82% gc time)
-
-# # # # current setting
-# # # 451.304 s (206476927 allocations: 388.43 GiB)
-# # # 384.532732 seconds (206.22 M allocations: 387.976 GiB, 13.17% gc time)
-# # # 310.223 s (206332558 allocations: 388.63 GiB)
-# # # 344.243889 seconds (206.34 M allocations: 388.409 GiB, 13.84% gc time)
-# # # 357.457737 seconds (206.66 M allocations: 389.064 GiB, 18.16% gc time)
-
-# # # # shit setup
-# # #   325.595 s (206283732 allocations: 388.41 GiB)
-# # # 334.248753 seconds (206.06 M allocations: 387.964 GiB, 12.60% gc time)
-# # #   326.011 s (206370857 allocations: 388.56 GiB)
-# # # 327.203339 seconds (206.29 M allocations: 388.405 GiB, 12.92% gc time)
-
-# # # # in wrapper Distribution prior, insiade FLOAT64 DATASET
-# # # 325.158167 seconds (205.97 M allocations: 387.958 GiB, 15.07% gc time) 
-# # #   429.536 s (206476324 allocations: 388.43 GiB)
-# # #   527.364 s (206740343 allocations: 388.58 GiB)
-
-# # # #   wrapper Distribtuion, inside Float64
-# # # 326.017 s (206037971 allocations: 387.96 GiB)
-# # # 347.424730 seconds (206.45 M allocations: 388.532 GiB, 12.92% gc time)
-
-# # # 439.047568 seconds (284.24 M allocations: 392.598 GiB, 15.25% gc time, 14.36% compilation time: 0% of which was recompilation)
-# # # 375.472142 seconds (206.40 M allocations: 388.529 GiB, 14.93% gc time)
-# # # 374.888820 seconds (206.34 M allocations: 388.346 GiB, 14.09% gc time)
-# # # 363.719611 seconds (206.39 M allocations: 388.581 GiB, 15.08% gc time)
-# # # # inside Distribtion, instide Float64
-# # #   310.238 s (206324249 allocations: 388.53 GiB)
-# # #   308.991494 seconds (206.34 M allocations: 388.549 GiB, 14.01% gc time)
-# # #   337.442 s (206280712 allocations: 388.36 GiB)
-# # #   299.983096 seconds (206.29 M allocations: 388.512 GiB, 17.14% gc time)
-
-# # #   394.924357 seconds (206.27 M allocations: 388.337 GiB, 23.68% gc time)
-# # # 438.204179 seconds (206.39 M allocations: 388.470 GiB, 23.84% gc time)
-# # #   376.626914 seconds (206.46 M allocations: 388.693 GiB, 18.72% gc time)
-# # # 286.863795 seconds (206.14 M allocations: 388.370 GiB, 18.80% gc time)
-# # #   285.556929 seconds (206.22 M allocations: 388.371 GiB, 17.04% gc time)
-# # #   291.471662 seconds (205.96 M allocations: 388.068 GiB, 19.85% gc time)
-
-# # # 495.814341 seconds (284.62 M allocations: 392.622 GiB, 12.56% gc time, 10.96% compilation time: 0% of which was recompilation)
-# # # 361.530617 seconds (206.36 M allocations: 388.526 GiB, 14.98% gc time)
-# # # 348.576065 seconds (206.22 M allocations: 388.337 GiB, 15.01% gc time)
-# # # 374.575609 seconds (206.45 M allocations: 388.586 GiB, 14.65% gc time)
-# # # 314.223008 seconds (206.23 M allocations: 388.411 GiB, 14.63% gc time)
-
-# # PROBLEM-3 LOTKA VOLTERRA EXAMPLE [WIP] (WITH PARAMETER ESTIMATION)(will be put in tutorial page)
-# function lotka_volterra(u, p, t)
-#     # Model parameters.
-#     α, β, γ, δ = p
-#     # Current state.
-#     x, y = u
-
-#     # Evaluate differential equations.
-#     dx = (α - β * y) * x # prey
-#     dy = (δ * x - γ) * y # predator
-
-#     return [dx, dy]
-# end
-
-# u0 = [1.0, 1.0]
-# p = [1.5, 1.0, 3.0, 1.0]
-# tspan = (0.0, 6.0)
-# prob = ODEProblem(lotka_volterra, u0, tspan, p)
-# solution = solve(prob, Tsit5(); saveat = 0.05)
-
-# as = reduce(hcat, solution.u)
-# as[1, :]
-# # Plot simulation.
-# time = solution.t
-# u = hcat(solution.u...)
-# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-# x = u[1, :] + 0.5 * randn(length(u[1, :]))
-# y = u[2, :] + 0.5 * randn(length(u[1, :]))
-# dataset = [x[1:50], y[1:50], time[1:50]]
-# # scatter!(time, [x, y])
-# # scatter!(dataset[3], [dataset[2], dataset[1]])
-
-# # NN has 2 outputs as u -> [dx,dy]
-# chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
-#     Lux.Dense(6, 2))
-# chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
-
-# # fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-# #                                                                           dataset = dataset,
-# #                                                                           draw_samples = 1000,
-# #                                                                           l2std = [
-# #                                                                               0.05,
-# #                                                                               0.05,
-# #                                                                           ],
-# #                                                                           phystd = [
-# #                                                                               0.05,
-# #                                                                               0.05,
-# #                                                                           ],
-# #                                                                           priorsNNw = (0.0,
-# #          
-
-# #   3.0))
-
-# # check if NN output is more than 1
-# # numoutput = size(luxar[1])[1]
-# # if numoutput > 1
-# #     # Initialize a vector to store the separated outputs for each output dimension
-# #     output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput]
-
-# #     # Loop through each element in the `as` vector
-# #     for element in as
-# #         for i in 1:numoutput
-# #             push!(output_matrices[i], element[i, :])  # Append the i-th output (i-th row) to the i-th output_matrices
-# #         end
-# #     end
-
-# #     ensemblecurves = Vector{}[]
-# #     for r in 1:numoutput
-# #         br = hcat(output_matrices[r]...)'
-# #         ensemblecurve = prob.u0[r] .+
-# #                         [Particles(br[:, i]) for i in 1:length(t)] .*
-# #                         (t .- prob.tspan[1])
-# #         push!(ensemblecurves, ensemblecurve)
-# #     end
-
-# # else
-# #     # ensemblecurve = prob.u0 .+
-# #     #                 [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .*
-# #     #                 (t .- prob.tspan[1])
-# #     print("yuh")
-# # end
-
-# # fhsamplesflux2
-# # nnparams = length(init1)
-# # estimnnparams = [Particles(reduce(hcat, fhsamplesflux2)[i, :]) for i in 1:nnparams]
-# # ninv=4
-# # estimated_params = [Particles(reduce(hcat, fhsamplesflux2[(end - ninv + 1):end])[i, :])
-# #                     for i in (nnparams + 1):(nnparams + ninv)]
-# # output_matrices[r]
-# # br = hcat(output_matrices[r]...)'
-
-# # br[:, 1]
-
-# # [Particles(br[:, i]) for i in 1:length(t)]
-# # prob.u0
-# # [Particles(br[:, i]) for i in 1:length(t)] .*
-# # (t .- prob.tspan[1])
-
-# # ensemblecurve = prob.u0[r] .+
-# #                 [Particles(br[:, i]) for i in 1:length(t)] .*
-# #                 (t .- prob.tspan[1])
-# # push!(ensemblecurves, ensemblecurve)
-
-# using StatsPlots
-# plotly()
-# plot(t, ensemblecurve)
-# plot(t, ensemblecurves[1])
-# plot!(t, ensemblecurves[2])
-# ensemblecurve
-# ensemblecurves[1]
-# fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-#     dataset = dataset,
-#     draw_samples = 1000,
-#     l2std = [
-#         0.05,
-#         0.05,
-#     ],
-#     phystd = [
-#         0.05,
-#         0.05,
-#     ],
-#     priorsNNw = (0.0,
-#         3.0),
-#     param = [
-#         Normal(1.5,
-#             0.5),
-#         Normal(1.2,
-#             0.5),
-#         Normal(3.3,
-#             0.5),
-#         Normal(1.4,
-#             0.5),
-#     ], progress = true)
-
-# alg = NeuralPDE.BNNODE(chainflux1,
-#     dataset = dataset,
-#     draw_samples = 1000,
-#     l2std = [
-#         0.05,
-#         0.05,
-#     ],
-#     phystd = [
-#         0.05,
-#         0.05,
-#     ],
-#     priorsNNw = (0.0,
-#         3.0),
-#     param = [
-#         Normal(4.5,
-#             5),
-#         Normal(7,
-#             2),
-#         Normal(5,
-#             2),
-#         Normal(-4,
-#             6),
-#     ],
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux_pestim = solve(prob, alg)
-
-# # OG PARAM VALUES
-# [1.5, 1.0, 3.0, 1.0]
-# # less
-# # [1.34, 7.51, 2.54, -2.55]
-# # better
-# # [1.48, 0.993, 2.77, 0.954]
-
-# sol3flux_pestim.es
-# sol3flux_pestim.estimated_ode_params
-# # fh_mcmc_chainlux1, fhsampleslux1, fhstatslux1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                                        dataset = dataset,
-# #                                                                        draw_samples = 1000,
-# #                                                                        l2std = [0.05, 0.05],
-# #                                                                        phystd = [
-# #                                                                            0.05,
-# #                                                                            0.05,
-# #                                                                        ],
-# #                                                                        priorsNNw = (0.0,
-# #                                                                                     3.0))
-
-# # fh_mcmc_chainlux2, fhsampleslux2, fhstatslux2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                                        dataset = dataset,
-# #                                                                        draw_samples = 1000,
-# #                                                                        l2std = [0.05, 0.05],
-# #                                                                        phystd = [
-# #                                                                            0.05,
-# #                                                                            0.05,
-# #                                                                        ],
-# #                                                                        priorsNNw = (0.0,
-# #                                                                                     3.0),
-# #                                                                        param = [
-# #                                                                            Normal(1.5, 0.5),
-# #                                                                            Normal(1.2, 0.5),
-# #                                                                            Normal(3.3, 0.5),
-# #                                                                            Normal(1.4, 0.5),
-# #                                                                        ])
-
-# init1, re1 = destructure(chainflux1)
-# θinit, st = Lux.setup(Random.default_rng(), chainlux1)
-# #   PLOT testing points
-# t = time
-# p = prob.p
-# collect(Float64, vcat(ComponentArrays.ComponentArray(θinit)))
-# collect(Float64, ComponentArrays.ComponentArray(θinit))
-# # Mean of last 1000 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-# out = re1.([fhsamplesflux1[i][1:68] for i in 500:1000])
-# yu = [out[i](t') for i in eachindex(out)]
-
-# function getensemble(yu, num_models)
-#     num_rows, num_cols = size(yu[1])
-#     row_means = zeros(Float32, num_rows, num_cols)
-#     for i in 1:num_models
-#         row_means .+= yu[i]
-#     end
-#     row_means ./ num_models
-# end
-# fluxmean = getensemble(yu, length(out))
-# meanscurve1_1 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
-# mean(abs.(u .- meanscurve1_1))
-
-# plot!(t, physsol1)
-# @test mean(abs2.(x̂ .- meanscurve1_1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# out = re1.([fhsamplesflux2[i][1:68] for i in 500:1000])
-# yu = collect(out[i](t') for i in eachindex(out))
-# fluxmean = getensemble(yu, length(out))
-# meanscurve1_2 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
-# mean(abs.(u .- meanscurve1_2))
-
-# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# θ = [vector_to_parameters(fhsampleslux1[i][1:(end - 4)], θinit) for i in 500:1000]
-# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
-# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-# meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# θ = [vector_to_parameters(fhsampleslux2[i][1:(end - 4)], θinit) for i in 500:1000]
-# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
-# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-# meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
-# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
-# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
-# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
-
-# # # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-# @test abs(p - mean([fhsamplesflux2[i][69] for i in 500:1000])) < 0.1 * p[1]
-# @test abs(p - mean([fhsampleslux2[i][69] for i in 500:1000])) < 0.2 * p[1]
-
-# # @test abs(p - mean([fhsamplesflux2[i][70] for i in 500:1000])) < 0.1 * p[2]
-# # @test abs(p - mean([fhsampleslux2[i][70] for i in 500:1000])) < 0.2 * p[2]
-
-# # @test abs(p - mean([fhsamplesflux2[i][71] for i in 500:1000])) < 0.1 * p[3]
-# # @test abs(p - mean([fhsampleslux2[i][71] for i in 500:1000])) < 0.2 * p[3]
-
-# # @test abs(p - mean([fhsamplesflux2[i][72] for i in 500:1000])) < 0.1 * p[4]
-# # @test abs(p - mean([fhsampleslux2[i][72] for i in 500:1000])) < 0.2 * p[4]
-
-# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                               dataset = dataset,
-# #                                                               draw_samples = 1000,
-# #                                                               l2std = [0.05, 0.05],
-# #                                                               phystd = [0.05, 0.05],
-# #                                                               priorsNNw = (0.0, 3.0),
-# #                                                               param = [
-# #                                                                   Normal(1.5, 0.5),
-# #                                                                   Normal(1.2, 0.5),
-# #                                                                   Normal(3.3, 0.5),
-# #                                                                   Normal(1.4, 0.5),
-# #                                                               ], autodiff = true)
-
-# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
-# #                                                               dataset = dataset,
-# #                                                               draw_samples = 1000,
-# #                                                               l2std = [0.05, 0.05],
-# #                                                               phystd = [0.05, 0.05],
-# #                                                               priorsNNw = (0.0, 3.0),
-# #                                                               param = [
-# #                                                                   Normal(1.5, 0.5),
-# #                                                                   Normal(1.2, 0.5),
-# #                                                                   Normal(3.3, 0.5),
-# #                                                                   Normal(1.4, 0.5),
-# #                                                               ], nchains = 2)
-
-# # NOTES (WILL CLEAR LATER)
-# # --------------------------------------------------------------------------------------------
-# # Hamiltonian energy must be lowest(more paramters the better is it to map onto them)
-# # full better than L2 and phy individual(test)
-# # in mergephys more points after training points is better from 20->40
-# # does consecutive runs bceome better? why?(plot 172)(same chain maybe)
-# # does density of points in timespan matter dataset vs internal timespan?(plot 172)(100+0.01)
-# # when training from 0-1 and phys from 1-5 with 1/150 simple nn slow,but bigger nn faster decrease in Hmailtonian
-# # bigger time interval more curves to adapt to only more parameters adapt to that, better NN architecture
-# # higher order logproblems solve better
-# # repl up up are same instances? but reexecute calls are new?
-
-# #Compare results against paper example
-# # Lux chains support (DONE)
-# # fix predictions for odes depending upon 1,p in f(u,p,t)(DONE)
-# # lotka volterra learn curve beyond l2 losses(L2 losses determine accuracy of parameters)(parameters cant run free ∴ L2 interval only)
-# # check if prameters estimation works(YES)
-# # lotka volterra parameters estimate (DONE)
-
-# using NeuralPDE, Lux, Flux, Optimization, OptimizationOptimJL
-# import ModelingToolkit: Interval
-# using Plots, StatsPlots
-# plotly()
-# # Profile.init()
-
-# @parameters x y
-# @variables u(..)
-# Dxx = Differential(x)^2
-# Dyy = Differential(y)^2
-
-# # 2D PDE
-# eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# # Boundary conditions
-# bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-#     u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
-# # Space and time domains
-# domains = [x ∈ Interval(0.0, 1.0),
-#     y ∈ Interval(0.0, 1.0)]
-
-# # Neural network
-# dim = 2 # number of dimensions
-# chain = Flux.Chain(Flux.Dense(dim, 16, Lux.σ), Flux.Dense(16, 16, Lux.σ), Flux.Dense(16, 1))
-# θ, re = destructure(chain)
-# # Discretization
-# dx = 0.05
-# discretization = PhysicsInformedNN(chain, GridTraining(dx))
-
-# @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-
-# pinnrep = symbolic_discretize(pde_system, discretization)
-# typeof(pinnrep.phi)
-# typeof(pinnrep.phi)
-# typeof(re)
-# pinnrep.phi([1, 2], θ)
-
-# typeof(θ)
-
-# print(pinnrep)
-# pinnrep.eqs
-# pinnrep.bcs
-# pinnrep.domains
-# pinnrep.eq_params
-# pinnrep.defaults
-# print(pinnrep.default_p)
-# pinnrep.param_estim
-# print(pinnrep.additional_loss)
-# pinnrep.adaloss
-# pinnrep.depvars
-# pinnrep.indvars
-# pinnrep.dict_depvar_input
-# pinnrep.dict_depvars
-# pinnrep.dict_indvars
-# print(pinnrep.logger)
-# pinnrep.multioutput
-# pinnrep.iteration
-# pinnrep.init_params
-# pinnrep.flat_init_params
-# pinnrep.phi
-# pinnrep.derivative
-# pinnrep.strategy
-# pinnrep.pde_indvars
-# pinnrep.bc_indvars
-# pinnrep.pde_integration_vars
-# pinnrep.bc_integration_vars
-# pinnrep.integral
-# pinnrep.symbolic_pde_loss_functions
-# pinnrep.symbolic_bc_loss_functions
-# pinnrep.loss_functions
-
-# #  = discretize(pde_system, discretization)
-# prob = symbolic_discretize(pde_system, discretization)
-# # "The boundary condition loss functions"
-# sum([prob.loss_functions.bc_loss_functions[i](θ) for i in eachindex(1:4)])
-# sum([prob.loss_functions.pde_loss_functions[i](θ) for i in eachindex(1)])
-
-# prob.loss_functions.full_loss_function(θ, 32)
-
-# prob.loss_functions.bc_loss_functions[1](θ)
-
-# prob.loss_functions.bc_loss_functions
-# prob.loss_functions.full_loss_function
-# prob.loss_functions.additional_loss_function
-# prob.loss_functions.pde_loss_functions
-
-# 1.3953060473003345 + 1.378102161087438 + 1.395376727128639 + 1.3783868705075002 +
-# 0.22674532775196876
-# # "The PDE loss functions"
-# prob.loss_functions.pde_loss_functions
-# prob.loss_functions.pde_loss_functions[1](θ)
-# # "The full loss function, combining the PDE and boundary condition loss functions.This is the loss function that is used by the optimizer."
-# prob.loss_functions.full_loss_function(θ, nothing)
-# prob.loss_functions.full_loss_function(θ, 423423)
-
-# # "The wrapped `additional_loss`, as pieced together for the optimizer."
-# prob.loss_functions.additional_loss_function
-# # "The pre-data version of the PDE loss function"
-# prob.loss_functions.datafree_pde_loss_functions
-# # "The pre-data version of the BC loss function"
-# prob.loss_functions.datafree_bc_loss_functions
-
-# using Random
-# θ, st = Lux.setup(Random.default_rng(), chain)
-# #Optimizer
-# opt = OptimizationOptimJL.BFGS()
-
-# #Callback function
-# callback = function (p, l)
-#     println("Current loss is: $l")
-#     return false
-# end
-
-# res = Optimization.solve(prob, opt, callback = callback, maxiters = 1000)
-# phi = discretization.phi
-
-# # ------------------------------------------------
-# using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, OrdinaryDiffEq,
-#       Plots
-# import ModelingToolkit: Interval, infimum, supremum
-# @parameters t, σ_, β, ρ
-# @variables x(..), y(..), z(..)
-# Dt = Differential(t)
-# eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-#     Dt(y(t)) ~ x(t) * (ρ - z(t)) - y(t),
-#     Dt(z(t)) ~ x(t) * y(t) - β * z(t)]
-
-# bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-# domains = [t ∈ Interval(0.0, 1.0)]
-# dt = 0.01
-
-# input_ = length(domains)
-# n = 8
-# chain1 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, 1))
-# chain2 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, 1))
-# chain3 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, n, Lux.σ),
-#                    Lux.Dense(n, 1))
-
-# function lorenz!(du, u, p, t)
-#     du[1] = 10.0 * (u[2] - u[1])
-#     du[2] = u[1] * (28.0 - u[3]) - u[2]
-#     du[3] = u[1] * u[2] - (8 / 3) * u[3]
-# end
-
-# u0 = [1.0; 0.0; 0.0]
-# tspan = (0.0, 1.0)
-# prob = ODEProblem(lorenz!, u0, tspan)
-# sol = solve(prob, Tsit5(), dt = 0.1)
-# ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
-# function getData(sol)
-#     data = []
-#     us = hcat(sol(ts).u...)
-#     ts_ = hcat(sol(ts).t...)
-#     return [us, ts_]
-# end
-# data = getData(sol)
-
-# (u_, t_) = data
-# len = length(data[2])
-
-# depvars = [:x, :y, :z]
-# function additional_loss(phi, θ, p)
-#     return sum(sum(abs2, phi[i](t_, θ[depvars[i]]) .- u_[[i], :]) / len for i in 1:1:3)
-# end
-
-# discretization = NeuralPDE.PhysicsInformedNN([chain1, chain2, chain3],
-#                                              NeuralPDE.GridTraining(dt),
-#                                              param_estim = false,
-#                                              additional_loss = additional_loss)
-# @named pde_system = PDESystem(eqs, bcs, domains, [t], [x(t), y(t), z(t)], [σ_, ρ, β],
-#                               defaults = Dict([p .=> 1.0 for p in [σ_, ρ, β]]))
-# prob = NeuralPDE.discretize(pde_system, discretization)
-# callback = function (p, l)
-#     println("Current loss is: $l")
-#     return false
-# end
-# res = Optimization.solve(prob, BFGS(); callback = callback, maxiters = 5000)
-# p_ = res.u[(end - 2):end] # p_ = [9.93, 28.002, 2.667]
-
-# minimizers = [res.u.depvar[depvars[i]] for i in 1:3]
-# ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
-# u_predict = [[discretization.phi[i]([t], minimizers[i])[1] for t in ts] for i in 1:3]
-# plot(sol)
-# plot!(ts, u_predict, label = ["x(t)" "y(t)" "z(t)"])
-
-# discretization.multioutput
-# discretization.chain
-# discretization.strategy
-# discretization.init_params
-# discretization.phi
-# discretization.derivative
-# discretization.param_estim
-# discretization.additional_loss
-# discretization.adaptive_loss
-# discretization.logger
-# discretization.log_options
-# discretization.iteration
-# discretization.self_increment
-# discretization.multioutput
-# discretization.kwargs
-
-# struct BNNODE1{P <: Vector{<:Distribution}}
-#     chain::Any
-#     Kernel::Any
-#     draw_samples::UInt32
-#     priorsNNw::Tuple{Float64, Float64}
-#     param::P
-#     l2std::Vector{Float64}
-#     phystd::Vector{Float64}
-
-#     function BNNODE1(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
-#                      l2std = [0.05], phystd = [0.05])
-#         BNNODE1(chain, Kernel, draw_samples, priorsNNw, param, l2std, phystd)
-#     end
-# end
-
-# struct BNNODE3{C, K, P <: Union{Any, Vector{<:Distribution}}}
-#     chain::C
-#     Kernel::K
-#     draw_samples::UInt32
-#     priorsNNw::Tuple{Float64, Float64}
-#     param::P
-#     l2std::Vector{Float64}
-#     phystd::Vector{Float64}
-
-#     function BNNODE3(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
-#                      l2std = [0.05], phystd = [0.05])
-#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain, Kernel, draw_samples,
-#                                                           priorsNNw, param, l2std, phystd)
-#     end
-# end
-# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-# linear = (u, p, t) -> cos(2 * π * t)
-# tspan = (0.0, 2.0)
-# u0 = 0.0
-# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-
-# ta = range(tspan[1], tspan[2], length = 300)
-# u = [linear_analytic(u0, nothing, ti) for ti in ta]
-# sol1 = solve(prob, Tsit5())
-
-# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂[1:100], time[1:100]]
-
-# # Call BPINN, create chain
-# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
-# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-# HMC
-# solve(prob, BNNODE(chainflux, HMC))
-# BNNODE1(chainflux, HMC, 2000)
-
-# draw_samples = 2000
-# priorsNNw = (0.0, 3.0)
-# param = []
-# l2std = [0.05]
-# phystd = [0.05]
-# @time BNNODE3(chainflux, HMC, draw_samples = 2000, priorsNNw = (0.0, 3.0),
-#               param = [nothing],
-#               l2std = [0.05], phystd = [0.05])
-# typeof(Nothing) <: Vector{<:Distribution}
-# Nothing <: Distribution
-# {UnionAll} <: Distribution
-# @time [Nothing]
-# typeof([Nothing])
-# @time [1]
-
-# function test1(sum; c = 23, d = 32)
-#     return sum + c + d
-# end
-# function test(a, b; c, d)
-#     return test1(a + b, c, d)
-# end
-
-# test(2, 2)
-
-# struct BNNODE3{C, K, P <: Union{Vector{Nothing}, Vector{<:Distribution}}}
-#     chain::C
-#     Kernel::K
-#     draw_samples::Int64
-#     priorsNNw::Tuple{Float64, Float64}
-#     param::P
-#     l2std::Vector{Float64}
-#     phystd::Vector{Float64}
-
-#     function BNNODE3(chain, Kernel; draw_samples,
-#                      priorsNNw, param = [nothing], l2std, phystd)
-#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain,
-#                                                           Kernel,
-#                                                           draw_samples,
-#                                                           priorsNNw,
-#                                                           param, l2std,
-#                                                           phystd)
-#     end
-# end
-
-# function solve1(prob::DiffEqBase.AbstractODEProblem, alg::BNNODE3;
-#                 dataset = [nothing], dt = 1 / 20.0,
-#                 init_params = nothing, nchains = 1,
-#                 autodiff = false, Integrator = Leapfrog,
-#                 Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
-#                 Metric = DiagEuclideanMetric, jitter_rate = 3.0,
-#                 tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
-#                 n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = true,
-#                 verbose = false)
-#     chain = alg.chain
-#     l2std = alg.l2std
-#     phystd = alg.phystd
-#     priorsNNw = alg.priorsNNw
-#     Kernel = alg.Kernel
-#     draw_samples = alg.draw_samples
-
-#     param = alg.param == [nothing] ? [] : alg.param
-#     mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain, dataset = dataset,
-#                                                             draw_samples = draw_samples,
-#                                                             init_params = init_params,
-#                                                             physdt = dt, l2std = l2std,
-#                                                             phystd = phystd,
-#                                                             priorsNNw = priorsNNw,
-#                                                             param = param,
-#                                                             nchains = nchains,
-#                                                             autodiff = autodiff,
-#                                                             Kernel = Kernel,
-#                                                             Integrator = Integrator,
-#                                                             Adaptor = Adaptor,
-#                                                             targetacceptancerate = targetacceptancerate,
-#                                                             Metric = Metric,
-#                                                             jitter_rate = jitter_rate,
-#                                                             tempering_rate = tempering_rate,
-#                                                             max_depth = max_depth,
-#                                                             Δ_max = Δ_max,
-#                                                             n_leapfrog = n_leapfrog, δ = δ,
-#                                                             λ = λ, progress = progress,
-#                                                             verbose = verbose)
-# end
-
-# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-# linear = (u, p, t) -> cos(2 * π * t)
-# tspan = (0.0, 2.0)
-# u0 = 0.0
-# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-
-# ta = range(tspan[1], tspan[2], length = 300)
-# u = [linear_analytic(u0, nothing, ti) for ti in ta]
-# # sol1 = solve(prob, Tsit5())
-
-# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-# time = vec(collect(Float64, ta))
-# dataset = [x̂[1:100], time[1:100]]
-
-# # Call BPINN, create chain
-# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
-# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-# HMC
-
-# solve1(prob, a)
-# a = BNNODE3(chainflux, HMC, draw_samples = 2000,
-#             priorsNNw = (0.0, 3.0),
-#             l2std = [0.05], phystd = [0.05])
-
-# Define Lotka-Volterra model.
-function lotka_volterra1(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-u0 = [1.0, 1.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 6.0)
-prob = ODEProblem(lotka_volterra1, u0, tspan, p)
-solution = solve(prob, Tsit5(); saveat = 0.05)
-
-as = reduce(hcat, solution.u)
-as[1, :]
-# Plot simulation.
-time = solution.t
-u = hcat(solution.u...)
-# BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
-x = u[1, :] + 0.5 * randn(length(u[1, :]))
-y = u[2, :] + 0.5 * randn(length(u[1, :]))
-dataset = [x[1:50], y[1:50], time[1:50]]
-# scatter!(time, [x, y])
-# scatter!(dataset[3], [dataset[2], dataset[1]])
-
-# NN has 2 outputs as u -> [dx,dy]
-chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
-    Lux.Dense(6, 2))
-chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
-
-fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [
-        0.05,
-        0.05,
-    ],
-    phystd = [
-        0.05,
-        0.05,
-    ],
-    priorsNNw = (0.0, 3.0), progress = true)
-ahmc_bayesian_pinn_ode(prob, chainflux1,
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [
-        0.05,
-        0.05,
-    ],
-    phystd = [
-        0.05,
-        0.05,
-    ],
-    priorsNNw = (0.0, 3.0), progress = true)
-
-#     2×171 Matrix{Float64}:
-#  -0.5  -0.518956  -0.529639  …  -1.00266  -1.01049
-#   2.0   1.97109    1.92747       0.42619   0.396335
-
-#     2-element Vector{Float64}:
-#  -119451.94949911036
-#  -128543.23714618056
-
-# alg = NeuralPDE.BNNODE(chainflux1,
-#     dataset = dataset,
-#     draw_samples = 1000,
-#     l2std = [
-#         0.05,
-#         0.05,
-#     ],
-#     phystd = [
-#         0.05,
-#         0.05,
-#     ],
-#     priorsNNw = (0.0,
-#         3.0),
-#     param = [
-#         Normal(4.5,
-#             5),
-#         Normal(7,
-#             2),
-#         Normal(5,
-#             2),
-#         Normal(-4,
-#             6),
-#     ],
-#     n_leapfrog = 30, progress = true)
-
-# sol3flux_pestim = solve(prob, alg)
-
-#  ----------------------------------------------
-# original paper implementation
-# 25 points 
-run1  #7.70593 Particles{Float64, 1}
-run2 #6.66347 Particles{Float64, 1} 
-run3 #6.84827 Particles{Float64, 1} 
-
-# 50 points 
-run1 #7.83577 Particles{Float64, 1}
-run2 #6.49477 Particles{Float64, 1}
-run3 #6.47421 Particles{Float64, 1}
-
-# 100 points 
-run1 #5.96604 Particles{Float64, 1}
-run2 #6.05432 Particles{Float64, 1}
-run3 #6.08856 Particles{Float64, 1}
-
-# Full likelihood(uses total variation regularized differentiation) 
-# 25 points 
-run1 #6.41722 Particles{Float64, 1}
-run2 #6.42782 Particles{Float64, 1}
-run3 #6.42782 Particles{Float64, 1}
-
-# 50 points
-run1 #5.71268 Particles{Float64, 1}
-run2 #5.74599 Particles{Float64, 1}
-run3 #5.74599 Particles{Float64, 1}
-
-# 100 points  
-run1 #6.59097 Particles{Float64, 1}
-run2 #6.62813 Particles{Float64, 1}
-run3 #6.62813 Particles{Float64, 1}
-
-using Plots, StatsPlots
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u0 = [1.0, 1.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 6.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-
-# Plot simulation.
-
-solution = solve(prob, Tsit5(); saveat = 0.05)
-plot(solve(prob, Tsit5()))
-
-# Dataset creation for parameter estimation
-time = solution.t
-u = hcat(solution.u...)
-x = u[1, :] + 0.5 * randn(length(u[1, :]))
-y = u[2, :] + 0.5 * randn(length(u[1, :]))
-dataset = [x, y, time]
-
-# Neural Networks must have 2 outputs as u -> [dx,dy] in function lotka_volterra()
-chainflux = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2)) |>
-            Flux.f64
-
-chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
-
-alg1 = NeuralPDE.BNNODE(chainflux,
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [
-        0.01,
-        0.01,
-    ],
-    phystd = [
-        0.01,
-        0.01,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(1.5,
-            0.5),
-        LogNormal(1.2,
-            0.5),
-        LogNormal(3.3,
-            1),
-        LogNormal(1.4,
-            1)],
-    n_leapfrog = 30, progress = true)
-
-sol_flux_pestim = solve(prob, alg1)
-
-# Dataset not needed as we are solving the equation with ideal parameters
-alg2 = NeuralPDE.BNNODE(chainlux,
-    draw_samples = 1000,
-    l2std = [
-        0.05,
-        0.05,
-    ],
-    phystd = [
-        0.05,
-        0.05,
-    ],
-    priorsNNw = (0.0,
-        3.0),
-    n_leapfrog = 30, progress = true)
-
-sol_lux = solve(prob, alg2)
-
-#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
-t = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
-
-# plotting solution for x,y for chain_flux
-plot(t, sol_flux_pestim.ensemblesol[1])
-plot!(t, sol_flux_pestim.ensemblesol[2])
-
-plot(sol_flux_pestim.ens1mblesol[1])
-plot!(sol_flux_pestim.ensemblesol[2])
-
-# estimated ODE parameters by .estimated_ode_params, weights and biases by .estimated_nn_params
-sol_flux_pestim.estimated_nn_params
-sol_flux_pestim.estimated_ode_params
-
-# plotting solution for x,y for chain_lux
-plot(t, sol_lux.ensemblesol[1])
-plot!(t, sol_lux.ensemblesol[2])
-
-# estimated weights and biases by .estimated_nn_params for chain_lux
-sol_lux.estimated_nn_params
-
-# # ----------------------------------stats-----------------------------
-# #   ----------------------------
-# # -----------------------------
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:04:47 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:03:38 
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:04:12 
-# #   --------------------------
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:05:09 
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:47 
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:25 
-# #   --------------
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:06:47 
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:05:54
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:05:46
-# # ------------------------
-# # -----------------------
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -882.2934218498742
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:04:06
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -882.2934218498742
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:03:32
-
-# physics Logpdf is : -15740.509286661572
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -882.2934218498742
-# L2loss2 Logpdf is : -3118.0639515039957
-# Sampling 100%|███████████████████████████████| Time: 0:03:01 
-# # --------------------------
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1411.1717435511828
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:02
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1411.1717435511828
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:08
-
-# physics Logpdf is : -18864.79640643607
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -1411.1717435511828
-# L2loss2 Logpdf is : -6242.351071278482
-# Sampling 100%|███████████████████████████████| Time: 0:04:15
-# # ----------------------------
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -3240.067149411982
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:05:37
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -3240.067149411982
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:06:02
-
-# physics Logpdf is : -25119.77191296288
-# prior Logpdf is : -139.5069300318621
-# L2lossData Logpdf is : -3240.067149411982
-# L2loss2 Logpdf is : -12497.32657780532
-# Sampling 100%|███████████████████████████████| Time: 0:06:13
-
-using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL
-import ModelingToolkit: Interval, infimum, supremum
-
-using NeuralPDE, Flux, OptimizationOptimisers
-
-function diffeq(u, p, t)
-    u1, u2 = u
-    return [u2, p[1] + p[2] * sin(u1) + p[3] * u2]
-end
-p = [5, -10, -1.7]
-u0 = [-1.0, 7.0]
-tspan = (0.0, 10.0)
-prob = ODEProblem(ODEFunction(diffeq), u0, tspan, p)
-
-chainnew = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2)) |>
-           Flux.f64
-
-opt = OptimizationOptimisers.Adam(0.1)
-opt = Optimisers.ADAGrad(0.1)
-opt = Optimisers.AdaMax(0.01)
-algnew = NeuralPDE.NNODE(chainnew, opt)
-solution_new = solve(prob, algnew, verbose = true,
-    abstol = 1e-10, maxiters = 7000)
-u = reduce(hcat, solution_new.u)
-plot(solution_new.t, u[1, :])
-plot!(solution_new.t, u[2, :])
-
-algnew = NeuralPDE.BNNODE(chainnew, draw_samples = 200,
-    n_leapfrog = 30, progress = true)
-solution_new = solve(prob, algnew)
-
-@parameters t
-@variables u1(..), u2(..)
-D = Differential(t)
-eq = [D(u1(t)) ~ u2(t),
-    D(u2(t)) ~ 5 - 10 * sin(u1(t)) - 1.7 * u2(t)];
-
-import ModelingToolkit: Interval
-bcs = [u1(0) ~ -1, u2(0) ~ 7]
-domains = [t ∈ Interval(0.0, 10.0)]
-dt = 0.01
-
-input_ = length(domains) # number of dimensions
-n = 16
-chain = [Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ), Lux.Dense(n, 1))
-         for _ in 1:2]
-
-@named pde_system = PDESystem(eq, bcs, domains, [t], [u1(t), u2(t)])
-
-strategy = NeuralPDE.GridTraining(dt)
-discretization = PhysicsInformedNN(chain, strategy)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-
-pde_loss_functions = sym_prob.loss_functions.pde_loss_functions
-bc_loss_functions = sym_prob.loss_functions.bc_loss_functions
-
-callback = function (p, l)
-    println("loss: ", l)
-    # println("pde_losses: ", map(l_ -> l_(p), pde_loss_functions))
-    # println("bcs_losses: ", map(l_ -> l_(p), bc_loss_functions))
-    return false
-end
-
-loss_functions = [pde_loss_functions; bc_loss_functions]
-
-function loss_function(θ, p)
-    sum(map(l -> l(θ), loss_functions))
-end
-
-f_ = OptimizationFunction(loss_function, Optimization.AutoZygote())
-prob = Optimization.OptimizationProblem(f_, sym_prob.flat_init_params)
-
-res = Optimization.solve(prob,
-    OptimizationOptimJL.BFGS();
-    callback = callback,
-    maxiters = 1000)
-phi = discretization.phi
\ No newline at end of file

From 88be1e77df2c8df66fb2516367b683e3e4d46df8 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 21 Jan 2024 00:01:34 +0530
Subject: [PATCH 062/107] update advancedHMC_MCMC.jl

---
 ...jl~90dfd764 (Better Posterior Formulation) | 4354 +++++++++++++++++
 test/BPINN_Tests.jl~f5b4f1cb (trying to sync) |  336 ++
 2 files changed, 4690 insertions(+)
 create mode 100644 test/BPINN_Tests.jl~90dfd764 (Better Posterior Formulation)
 create mode 100644 test/BPINN_Tests.jl~f5b4f1cb (trying to sync)

diff --git a/test/BPINN_Tests.jl~90dfd764 (Better Posterior Formulation) b/test/BPINN_Tests.jl~90dfd764 (Better Posterior Formulation)
new file mode 100644
index 0000000000..fa2f04073e
--- /dev/null
+++ b/test/BPINN_Tests.jl~90dfd764 (Better Posterior Formulation)	
@@ -0,0 +1,4354 @@
+# # Testing Code
+using Test, MCMCChains
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using Flux, OptimizationOptimisers, AdvancedHMC, Lux
+using Statistics, Random, Functors, ComponentArrays
+using NeuralPDE, MonteCarloMeasurements
+
+# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
+# on latest Julia version it performs much better for below tests
+Random.seed!(100)
+
+# for sampled params->lux ComponentArray
+function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
+    @assert length(ps_new) == Lux.parameterlength(ps)
+    i = 1
+    function get_ps(x)
+        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
+        i += length(x)
+        return z
+    end
+    return Functors.fmap(get_ps, ps)
+end
+
+## PROBLEM-1 (WITHOUT PARAMETER ESTIMATION)
+linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+linear = (u, p, t) -> cos(2 * π * t)
+tspan = (0.0, 2.0)
+u0 = 0.0
+prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+p = prob.p
+
+# Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
+ta = range(tspan[1], tspan[2], length = 300)
+u = [linear_analytic(u0, nothing, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+time = vec(collect(Float64, ta))
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# testing points for solve() call must match saveat(1/50.0) arg
+ta0 = range(tspan[1], tspan[2], length = 101)
+u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
+x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
+time1 = vec(collect(Float64, ta0))
+physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+init1, re1 = destructure(chainflux)
+θinit, st = Lux.setup(Random.default_rng(), chainlux)
+
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux,
+    draw_samples = 2500,
+    n_leapfrog = 30)
+
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux,
+    draw_samples = 2500,
+    n_leapfrog = 30)
+
+# can change training strategies by adding this to call (Quadratuer and GridTraining show good results but stochastics sampling techniques perform bad)
+# strategy = QuadratureTraining(; quadrature_alg = QuadGKJL(),
+#     reltol = 1e-6,
+#     abstol = 1e-3, maxiters = 1000,
+#     batch = 0)
+
+alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500,
+    n_leapfrog = 30)
+sol1flux = solve(prob, alg)
+
+alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500,
+    n_leapfrog = 30)
+sol1lux = solve(prob, alg)
+
+# testing points
+t = time
+# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+out = re1.(fhsamples1[(end - 500):end])
+yu = collect(out[i](t') for i in eachindex(out))
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+θ = [vector_to_parameters(fhsamples1[i], θinit) for i in 2000:2500]
+luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# --------------------- ahmc_bayesian_pinn_ode() call
+@test mean(abs.(x̂ .- meanscurve1)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve1)) < 0.005
+@test mean(abs.(x̂ .- meanscurve2)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve2)) < 0.005
+
+#--------------------- solve() call 
+@test mean(abs.(x̂1 .- sol1flux.ensemblesol[1])) < 0.05
+@test mean(abs.(physsol0_1 .- sol1flux.ensemblesol[1])) < 0.05
+@test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
+@test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
+
+## PROBLEM-1 (WITH PARAMETER ESTIMATION)
+linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+linear = (u, p, t) -> cos(p * t)
+tspan = (0.0, 2.0)
+u0 = 0.0
+p = 2 * pi
+prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
+
+# Numerical and Analytical Solutions
+sol1 = solve(prob, Tsit5(); saveat = 0.01)
+u = sol1.u
+time = sol1.t
+
+# BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) .+ (0.2 .* Array(u) .* randn(size(u))))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
+ta0 = range(tspan[1], tspan[2], length = 101)
+u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
+time1 = vec(collect(Float64, ta0))
+physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+using Plots, StatsPlots
+# plot(dataset[2], calderivatives(dataset)')
+yu = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+plot(yu, [linear_analytic(u0, p, t) for t in yu])
+chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+init1, re1 = destructure(chainflux1)
+θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0f0,
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(9,
+            0.5),
+    ],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+    dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 10.0),
+    l2std = [0.005], phystd = [0.01],
+    param = [Normal(11, 6)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+# original paper (pure data 0 1)
+sol1flux = solve(prob, alg)
+sol1flux.estimated_ode_params
+# pure data method 1 1
+sol2flux = solve(prob, alg)
+sol2flux.estimated_ode_params
+# pure data method 1 0
+sol3flux = solve(prob, alg)
+sol3flux.estimated_ode_params
+# deri collocation
+sol4flux = solve(prob, alg)
+sol4flux.estimated_ode_params
+# collocation
+sol5flux = solve(prob, alg)
+sol5flux.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux = solve(prob, alg)
+sol6flux.estimated_ode_params
+# 2500 iters
+sol7flux = solve(prob, alg)
+sol7flux.estimated_ode_params
+
+plotly()
+plot!(yu, sol1flux.ensemblesol[1])
+plot!(yu, sol2flux.ensemblesol[1])
+plot!(yu, sol3flux.ensemblesol[1])
+plot!(yu, sol4flux.ensemblesol[1])
+plot!(yu, sol5flux.ensemblesol[1])
+plot!(yu, sol6flux.ensemblesol[1])
+
+plot!(dataset[2], dataset[1])
+
+# plot!(sol4flux.ensemblesol[1])
+# plot!(sol5flux.ensemblesol[1])
+
+sol2flux.estimated_ode_params
+
+sol1flux.estimated_ode_params
+
+sol3flux.estimated_ode_params
+
+sol4flux.estimated_ode_params
+
+sol5flux.estimated_ode_params
+
+alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0f0,
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(9,
+            0.5),
+    ],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30)
+
+sol2lux = solve(prob, alg)
+
+# testing points
+t = time
+# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+out = re1.([fhsamples1[i][1:22] for i in 2000:2500])
+yu = collect(out[i](t') for i in eachindex(out))
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+θ = [vector_to_parameters(fhsamples2[i][1:(end - 1)], θinit) for i in 2000:2500]
+luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# --------------------- ahmc_bayesian_pinn_ode() call  
+@test mean(abs.(physsol1 .- meanscurve1)) < 0.15
+@test mean(abs.(physsol1 .- meanscurve2)) < 0.15
+
+# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.25 * p)
+@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.25 * p)
+
+#-------------------------- solve() call  
+@test mean(abs.(physsol1_1 .- sol2flux.ensemblesol[1])) < 8e-2
+@test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
+
+# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+@test abs(p - sol1flux.estimated_ode_params[1]) < abs(0.15 * p)
+@test abs(p - sol2lux.estimated_ode_params[1]) < abs(0.15 * p)
+
+## PROBLEM-2
+linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = -5.0
+prob = ODEProblem(linear, u0, tspan, p)
+linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
+
+# SOLUTION AND CREATE DATASET
+sol = solve(prob, Tsit5(); saveat = 0.1)
+u = sol.u
+time = sol.t
+x̂ = u .+ (u .* 0.2) .* randn(size(u))
+dataset = [x̂, time]
+t = sol.t
+physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)]
+
+ta0 = range(tspan[1], tspan[2], length = 501)
+u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+time1 = vec(collect(Float64, ta0))
+physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux12 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh),
+    Flux.Dense(6, 1)) |> Flux.f64
+chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+init1, re1 = destructure(chainflux12)
+θinit, st = Lux.setup(Random.default_rng(), chainlux12)
+
+using Flux
+using Random
+
+function derivatives(chainflux, dataset)
+    loss(x, y) = Flux.mse(chainflux(x), y)
+    optimizer = Flux.Optimise.ADAM(0.01)
+    epochs = 2500
+    for epoch in 1:epochs
+        Flux.train!(loss, Flux.params(chainflux), [(dataset[2]', dataset[1]')], optimizer)
+    end
+    getgradient(chainflux, dataset)
+end
+
+function getgradient(chainflux, dataset)
+    return (chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64)))) .-
+            chainflux(dataset[end]')) ./
+           sqrt(eps(eltype(dataset[end][1])))
+end
+
+ans = derivatives(chainflux12, dataset)
+
+init3, re = destructure(chainflux12)
+init2 == init1
+init3 == init2
+plot!(dataset[end], ans')
+plot!(dataset[end], chainflux12(dataset[end]')')
+
+ars = getgradient(chainflux12, dataset)
+
+plot!(dataset[end], ars')
+
+fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob,
+    chainflux12,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03],
+    priorsNNw = (0.0,
+        10.0),
+    n_leapfrog = 30)
+
+fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(prob,
+    chainflux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30)
+
+fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    n_leapfrog = 30)
+
+fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30)
+
+alg1 = NeuralPDE.BNNODE(chainflux12,
+    dataset = dataset,
+    draw_samples = 500,
+    l2std = [0.01],
+    phystd = [
+        0.03,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30, progress = true)
+
+# original paper (pure data 0 1)
+sol1flux_pestim = solve(prob, alg1)
+sol1flux_pestim.estimated_ode_params
+# pure data method 1 1
+sol2flux_pestim = solve(prob, alg1)
+sol2flux_pestim.estimated_ode_params
+# pure data method 1 0
+sol3flux_pestim = solve(prob, alg1)
+sol3flux_pestim.estimated_ode_params
+# deri collocation
+sol4flux_pestim = solve(prob, alg1)
+sol4flux_pestim.estimated_ode_params
+# collocation
+sol5flux_pestim = solve(prob, alg1)
+sol5flux_pestim.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux_pestim = solve(prob, alg1)
+sol6flux_pestim.estimated_ode_params
+
+using Plots, StatsPlots
+ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+plot(time, u)
+plot!(ars, sol1flux_pestim.ensemblesol[1])
+plot!(ars, sol2flux_pestim.ensemblesol[1])
+plot!(ars, sol3flux_pestim.ensemblesol[1])
+plot!(ars, sol4flux_pestim.ensemblesol[1])
+plot!(ars, sol5flux_pestim.ensemblesol[1])
+plot!(ars, sol6flux_pestim.ensemblesol[1])
+
+sol3flux_pestim.estimated_ode_params
+
+sol4flux_pestim.estimated_ode_params
+
+sol5flux_pestim.estimated_ode_params
+
+sol6flux_pestim.estimated_ode_params
+
+ars = collect(prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+init, re1 = destructure(chainflux12)
+init
+init1
+alg = NeuralPDE.BNNODE(chainlux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ],
+    n_leapfrog = 30)
+
+sol3lux_pestim = solve(prob, alg)
+
+# testing timepoints
+t = sol.t
+#------------------------------ ahmc_bayesian_pinn_ode() call 
+# Mean of last 500 sampled parameter's curves(flux chains)[Ensemble predictions]
+out = re1.([fhsamplesflux12[i][1:61] for i in 1000:1500])
+yu = [out[i](t') for i in eachindex(out)]
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1_1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+out = re1.([fhsamplesflux22[i][1:61] for i in 1000:1500])
+yu = [out[i](t') for i in eachindex(out)]
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1_2 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+@test mean(abs.(sol.u .- meanscurve1_1)) < 1e-2
+@test mean(abs.(physsol1 .- meanscurve1_1)) < 1e-2
+@test mean(abs.(sol.u .- meanscurve1_2)) < 5e-2
+@test mean(abs.(physsol1 .- meanscurve1_2)) < 5e-2
+
+# estimated parameters(flux chain)
+param1 = mean(i[62] for i in fhsamplesflux22[1000:1500])
+@test abs(param1 - p) < abs(0.3 * p)
+
+# Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
+θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
+luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500]
+luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+@test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
+@test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
+@test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
+@test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
+
+# estimated parameters(lux chain)
+param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
+@test abs(param1 - p) < abs(0.3 * p)
+
+#-------------------------- solve() call 
+# (flux chain)
+@test mean(abs.(physsol2 .- sol3flux_pestim.ensemblesol[1])) < 0.15
+# estimated parameters(flux chain)
+param1 = sol3flux_pestim.estimated_ode_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
+
+# (lux chain)
+@test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
+# estimated parameters(lux chain)
+param1 = sol3lux_pestim.estimated_ode_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
+
+using Plots, StatsPlots
+using NoiseRobustDifferentiation, Weave, DataInterpolations
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood
+# # 25 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+#     draw_samples = 1500, physdt = 1 / 50.0f0, phystd = [0.01],
+#     l2std = [0.01],
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1 = solve(prob, alg)
+# sol2flux1.estimated_ode_params[1] #6.41722 Particles{Float64, 1}, 6.02404 Particles{Float64, 1}
+# sol2flux2 = solve(prob, alg)
+# sol2flux2.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.07509 Particles{Float64, 1}
+# sol2flux3 = solve(prob, alg)
+# sol2flux3.estimated_ode_params[1] #6.42782 Particles{Float64, 1}, 6.00825 Particles{Float64, 1}
+
+# # 50 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11 = solve(prob, alg)
+# sol2flux11.estimated_ode_params[1] #5.71268 Particles{Float64, 1}, 6.07242 Particles{Float64, 1}
+# sol2flux22 = solve(prob, alg)
+# sol2flux22.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.04837 Particles{Float64, 1}
+# sol2flux33 = solve(prob, alg)
+# sol2flux33.estimated_ode_params[1] #5.74599 Particles{Float64, 1}, 6.02838 Particles{Float64, 1}
+
+# # 100 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111 = solve(prob, alg)
+# sol2flux111.estimated_ode_params[1] #6.59097 Particles{Float64, 1}, 5.89384 Particles{Float64, 1}
+# sol2flux222 = solve(prob, alg)
+# sol2flux222.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.88216 Particles{Float64, 1}
+# sol2flux333 = solve(prob, alg)
+# sol2flux333.estimated_ode_params[1] #6.62813 Particles{Float64, 1}, 5.85327 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, full likelihood cdm
+# # 25 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1_cdm = solve(prob, alg)
+# sol2flux1_cdm.estimated_ode_params[1]# 6.50506 Particles{Float64, 1} ,6.38963 Particles{Float64, 1}
+# sol2flux2_cdm = solve(prob, alg)
+# sol2flux2_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.39817 Particles{Float64, 1}
+# sol2flux3_cdm = solve(prob, alg)
+# sol2flux3_cdm.estimated_ode_params[1] #6.50032 Particles{Float64, 1} ,6.36296 Particles{Float64, 1}
+
+# # 50 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11_cdm = solve(prob, alg)
+# sol2flux11_cdm.estimated_ode_params[1] #6.52951 Particles{Float64, 1},5.15621 Particles{Float64, 1}
+# sol2flux22_cdm = solve(prob, alg)
+# sol2flux22_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.16363 Particles{Float64, 1}
+# sol2flux33_cdm = solve(prob, alg)
+# sol2flux33_cdm.estimated_ode_params[1] #6.54988 Particles{Float64, 1},5.15591 Particles{Float64, 1}
+
+# # 100 points 
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111_cdm = solve(prob, alg)
+# sol2flux111_cdm.estimated_ode_params[1] #6.74338 Particles{Float64, 1}, 9.72422 Particles{Float64, 1}
+# sol2flux222_cdm = solve(prob, alg)
+# sol2flux222_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.71991 Particles{Float64, 1}
+# sol2flux333_cdm = solve(prob, alg)
+# sol2flux333_cdm.estimated_ode_params[1] #6.72642 Particles{Float64, 1}, 9.75045 Particles{Float64, 1}
+
+# --------------------------------------------------------------------------------------
+#                              NEW SERIES OF TESTS (IN ORDER OF EXECUTION)
+#  -------------------------------------------------------------------------------------
+# original paper implementaion
+# 25 points
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, u .+ 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset1 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+# scatter!(time, u)
+# dataset
+# scatter!(dataset1[2], dataset1[1])
+# plot(time, physsol1)
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_normal = solve(prob, alg)
+sol2flux1_normal.estimated_ode_params[1]  #7.70593 Particles{Float64, 1}, 6.36096 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
+sol2flux2_normal = solve(prob, alg)
+sol2flux2_normal.estimated_ode_params[1] #6.66347 Particles{Float64, 1}, 6.36974 Particles{Float64, 1} | 6.45865 Particles{Float64, 1}
+sol2flux3_normal = solve(prob, alg)
+sol2flux3_normal.estimated_ode_params[1] #6.84827 Particles{Float64, 1}, 6.29555 Particles{Float64, 1} | 6.39947 Particles{Float64, 1}
+
+# 50 points
+ta = range(tspan[1], tspan[2], length = 50)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset2 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_normal = solve(prob, alg)
+sol2flux11_normal.estimated_ode_params[1] #7.83577 Particles{Float64, 1},6.24652 Particles{Float64, 1} | 6.34495 Particles{Float64, 1}
+sol2flux22_normal = solve(prob, alg)
+sol2flux22_normal.estimated_ode_params[1] #6.49477 Particles{Float64, 1},6.2118 Particles{Float64, 1} | 6.32476 Particles{Float64, 1}
+sol2flux33_normal = solve(prob, alg)
+sol2flux33_normal.estimated_ode_params[1] #6.47421 Particles{Float64, 1},6.33687 Particles{Float64, 1} | 6.2448 Particles{Float64, 1}
+
+# 100 points
+ta = range(tspan[1], tspan[2], length = 100)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset3 = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_normal = solve(prob, alg)
+sol2flux111_normal.estimated_ode_params[1] #5.96604 Particles{Float64, 1},5.99588 Particles{Float64, 1} | 6.19805 Particles{Float64, 1}
+sol2flux222_normal = solve(prob, alg)
+sol2flux222_normal.estimated_ode_params[1] #6.05432 Particles{Float64, 1},6.0768 Particles{Float64, 1} | 6.22948 Particles{Float64, 1}
+sol2flux333_normal = solve(prob, alg)
+sol2flux333_normal.estimated_ode_params[1] #6.08856 Particles{Float64, 1},5.94819 Particles{Float64, 1} | 6.2551 Particles{Float64, 1}
+
+# LOTKA VOLTERRA CASE 
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u01 = [1.0, 1.0]
+p1 = [1.5, 1.0, 3.0, 1.0]
+tspan1 = (0.0, 6.0)
+prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
+
+# chainlux = Lux.Chain(Lux.Dense(1, 7, Lux.tanh), Lux.Dense(7, 7, Lux.tanh), Lux.Dense(7, 2))
+chainflux1 = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2))
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t1 = collect(Float64, prob1.tspan[1]:(1 / 50.0):prob1.tspan[2])
+
+# --------------------------------------------------------------------------
+# original paper implementaion lotka volterra
+# 31 points  
+solution1 = solve(prob1, Tsit5(); saveat = 0.1)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] .+ 0.3 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] .+ 0.3 .* u1[2, :] .* randn(length(u1[2, :]))
+dataset2_1 = [x1, y1, time1]
+plot(dataset2_1[end], dataset2_1[1])
+plot!(dataset2_1[end], dataset2_1[2])
+plot!(time1, u1[1, :])
+plot!(time1, u1[2, :])
+
+alg1 = NeuralPDE.BNNODE(chainflux1,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    physdt = 1 / 20.0,
+    l2std = [
+        0.2,
+        0.2,
+    ],
+    phystd = [
+        0.5,
+        0.5,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(4,
+            3),
+        Normal(-2,
+            4),
+        Normal(0,
+            5),
+        Normal(2.5,
+            2)],
+    n_leapfrog = 30, progress = true)
+
+# original paper (pure data 0 1)
+sol1flux1_lotka = solve(prob1, alg1)
+sol1flux1_lotka.estimated_ode_params
+# pure data method 1 1
+sol2flux1_lotka = solve(prob1, alg1)
+sol2flux1_lotka.estimated_ode_params
+# pure data method 1 0
+sol3flux1_lotka = solve(prob1, alg1)
+sol3flux1_lotka.estimated_ode_params
+# deri collocation
+sol4flux1_lotka = solve(prob1, alg1)
+sol4flux1_lotka.estimated_ode_params
+# collocation
+sol5flux1_lotka = solve(prob1, alg1)
+sol5flux1_lotka.estimated_ode_params
+# collocation + L2Data loss(at 9,0.5 1,2 gives same)
+sol6flux1_lotka = solve(prob1, alg1)
+sol6flux1_lotka.estimated_ode_params
+
+sol7flux1_lotka = solve(prob1, alg1)
+sol7flux1_lotka.estimated_ode_params
+
+using Plots, StatsPlots
+plot(dataset2_1[3], u1[1, :])
+plot!(dataset2_1[3], u1[2, :])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol5flux1_normal.ensemblesol[2])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]),
+    sol1flux1_normal.ensemblesol[1],
+    legend = :outerbottomleft)
+sol1flux2_normal = solve(prob1, alg1)
+sol1flux2_normal.estimated_ode_params  #|
+sol1flux3_normal = solve(prob1, alg1)
+sol1flux3_normal.estimated_ode_params  #|
+sol1flux4_normal = solve(prob1, alg1)
+sol1flux4_normal.estimated_ode_params
+
+plotly()
+plot!(title = "yuh")
+plot!(dataset2_1[3], dataset2_1[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux1_normal.ensemblesol[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux2_normal.ensemblesol[1])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux3_normal.ensemblesol[2])
+plot!(collect(prob1.tspan[1]:(1 / 50.0):prob1.tspan[2]), sol1flux4_normal.ensemblesol[1])
+plot(time1, u1[1, :])
+plot!(time1, u1[2, :])
+
+ars = chainflux1(dataset2_1[end]')
+plot(ars[1, :])
+plot!(ars[2, :])
+
+function calculate_derivatives(dataset)
+    u = dataset[1]
+    u1 = dataset[2]
+    t = dataset[end]
+    # control points
+    n = Int(floor(length(t) / 10))
+    # spline for datasetvalues(solution) 
+    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    interp = CubicSpline(u, t)
+    interp1 = CubicSpline(u1, t)
+    # derrivatives interpolation
+    dx = t[2] - t[1]
+    time = collect(t[1]:dx:t[end])
+    smoothu = [interp(i) for i in time]
+    smoothu1 = [interp1(i) for i in time]
+    # derivative of the spline (must match function derivative) 
+    û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
+    û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
+    # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
+    # FDM
+    # û1 = diff(u) / dx
+    # dataset[1] and smoothu are almost equal(rounding errors)
+    return û, û1
+    # return 1
+end
+
+ar = calculate_derivatives(dataset2_1)
+plot(ar[1])
+plot!(ar[2])
+
+# 61 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.1)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_2 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.1,
+        0.1,
+    ],
+    phystd = [
+        0.1,
+        0.1,
+    ],
+    priorsNNw = (0.0,
+        5.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_normal = solve(prob1, alg1)
+sol1flux11_normal.estimated_ode_params #|
+sol1flux22_normal = solve(prob1, alg1)
+sol1flux22_normal.estimated_ode_params #|
+sol1flux33_normal = solve(prob1, alg1)
+sol1flux33_normal.estimated_ode_params #|
+
+# 121 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_3 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.1,
+        0.1,
+    ],
+    phystd = [
+        0.1,
+        0.1,
+    ],
+    priorsNNw = (0.0,
+        5.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_normal = solve(prob1, alg1)
+sol1flux111_normal.estimated_ode_params #|
+sol1flux222_normal = solve(prob1, alg1)
+sol1flux222_normal.estimated_ode_params #|
+sol1flux333_normal = solve(prob1, alg1)
+sol1flux333_normal.estimated_ode_params #| 
+
+# -------------------------------------------------------------------- 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:02:30
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:01:54
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# Sampling 100%|███████████████████████████████| Time: 0:01:59
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:44
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:41
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# Sampling 100%|███████████████████████████████| Time: 0:02:41
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:52
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:49
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# Sampling 100%|███████████████████████████████| Time: 0:03:50
+
+# # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+# physics Logpdf is : -6.659143464386241e7
+# prior Logpdf is : -150.30074579848434
+# L2lossData Logpdf is : -6.03075717462954e6
+# Sampling 100%|███████████████████████████████| Time: 0:04:54
+
+# physics Logpdf is : -8.70012053004202e8
+# prior Logpdf is : -150.3750892952511
+# L2lossData Logpdf is : -6.967914805207133e6
+# Sampling 100%|███████████████████████████████| Time: 0:05:09
+
+# physics Logpdf is : -5.417241281343099e7
+# prior Logpdf is : -150.52079555737976
+# L2lossData Logpdf is : -4.195953436792884e6
+# Sampling 100%|███████████████████████████████| Time: 0:05:01
+
+# physics Logpdf is : -4.579552981943833e8
+# prior Logpdf is : -150.30491731974283
+# L2lossData Logpdf is : -8.595475827260146e6
+# Sampling 100%|███████████████████████████████| Time: 0:06:08
+
+# physics Logpdf is : -1.989281834955769e7
+# prior Logpdf is : -150.16009042727543
+# L2lossData Logpdf is : -1.121270659669029e7
+# Sampling 100%|███████████████████████████████| Time: 0:05:38
+
+# physics Logpdf is : -8.683829147264534e8
+# prior Logpdf is : -150.37824872259102
+# L2lossData Logpdf is : -1.0887662888035845e7
+# Sampling 100%|███████████████████████████████| Time: 0:05:50
+
+# physics Logpdf is : -3.1944760610332566e8
+# prior Logpdf is : -150.33610348737565
+# L2lossData Logpdf is : -1.215458786744478e7
+# Sampling 100%|███████████████████████████████| Time: 0:10:50
+
+# physics Logpdf is : -3.2884572300341567e6
+# prior Logpdf is : -150.21002268156343
+# L2lossData Logpdf is : -1.102536731511176e7
+# Sampling 100%|███████████████████████████████| Time: 0:09:53
+
+# physics Logpdf is : -5.31293521002414e8
+# prior Logpdf is : -150.20948536040126
+# L2lossData Logpdf is : -1.818717239584132e7
+# Sampling 100%|███████████████████████████████| Time: 0:08:53
+
+# ----------------------------------------------------------
+# Full likelihood no l2 only new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new = solve(prob, alg)
+sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.21662 Particles{Float64, 1}
+sol2flux2_new = solve(prob, alg)
+sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 7.14238 Particles{Float64, 1}
+sol2flux3_new = solve(prob, alg)
+sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.79159 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new = solve(prob, alg)
+sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 5.33467 Particles{Float64, 1}
+sol2flux22_new = solve(prob, alg)
+sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.52419 Particles{Float64, 1}
+sol2flux33_new = solve(prob, alg)
+sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 5.36921 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new = solve(prob, alg)
+sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.45333 Particles{Float64, 1}
+sol2flux222_new = solve(prob, alg)
+sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 4.64417 Particles{Float64, 1}
+sol2flux333_new = solve(prob, alg)
+sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 5.88037 Particles{Float64, 1}
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new_all = solve(prob, alg)
+sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 6.4358 Particles{Float64, 1}
+sol2flux2_new_all = solve(prob, alg)
+sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 6.52449 Particles{Float64, 1}
+sol2flux3_new_all = solve(prob, alg)
+sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 6.34188 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new_all = solve(prob, alg)
+sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.37889 Particles{Float64, 1}
+sol2flux22_new_all = solve(prob, alg)
+sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.34747 Particles{Float64, 1}
+sol2flux33_new_all = solve(prob, alg)
+sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.39699 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new_all = solve(prob, alg)
+sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.24327 Particles{Float64, 1}
+sol2flux222_new_all = solve(prob, alg)
+sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 6.23928 Particles{Float64, 1}
+sol2flux333_new_all = solve(prob, alg)
+sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} | 6.2145 Particles{Float64, 1}
+
+# ---------------------------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients) lotka volterra
+# 36 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_new_all = solve(prob1, alg1)
+sol1flux1_new_all.estimated_ode_params[1]  #|
+sol1flux2_new_all = solve(prob1, alg1)
+sol1flux2_new_all.estimated_ode_params[1] #|
+sol1flux3_new_all = solve(prob1, alg1)
+sol1flux3_new_all.estimated_ode_params[1] #|
+
+# 61 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_new_all = solve(prob1, alg1)
+sol1flux11_new_all.estimated_ode_params[1] #|
+sol1flux22_new_all = solve(prob1, alg1)
+sol1flux22_new_all.estimated_ode_params[1] #|
+sol1flux33_new_all = solve(prob1, alg1)
+sol1flux33_new_all.estimated_ode_params[1] #|
+
+# 121 points 
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_new_all = solve(prob1, alg1)
+sol1flux111_new_all.estimated_ode_params[1] #|
+sol1flux222_new_all = solve(prob1, alg1)
+sol1flux222_new_all.estimated_ode_params[1] #|
+sol1flux333_new_all = solve(prob1, alg1)
+sol1flux333_new_all.estimated_ode_params[1] #|
+# -------------------------------------------------------------------- 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:32
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:19
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -640.4155412187399
+# L2loss2 Logpdf is : -757.9047847584478
+# Sampling 100%|███████████████████████████████| Time: 0:02:31
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:45
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:20
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1198.9147562830894
+# L2loss2 Logpdf is : -1517.3653615845183
+# Sampling 100%|███████████████████████████████| Time: 0:03:20
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:04:57
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:05:26
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -2473.741390504424
+# L2loss2 Logpdf is : -3037.8868319811254
+# Sampling 100%|███████████████████████████████| Time: 0:05:01
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients)
+# 25 points
+# 1*,2*,  
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_newdata_all = solve(prob, alg)
+sol2flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} | 5.73072 Particles{Float64, 1}
+sol2flux2_newdata_all = solve(prob, alg)
+sol2flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1} | 5.71597 Particles{Float64, 1}
+sol2flux3_newdata_all = solve(prob, alg)
+sol2flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1} | 5.7313 Particles{Float64, 1}
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_newdata_all = solve(prob, alg)
+sol2flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1} | 6.07153 Particles{Float64, 1}
+sol2flux22_newdata_all = solve(prob, alg)
+sol2flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1} | 6.06623 Particles{Float64, 1}
+sol2flux33_newdata_all = solve(prob, alg)
+sol2flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} | 6.12748 Particles{Float64, 1}
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_newdata_all = solve(prob, alg)
+sol2flux111_newdata_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1} | 6.26222 Particles{Float64, 1}
+sol2flux222_newdata_all = solve(prob, alg)
+sol2flux222_newdata_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1} | 5.86494 Particles{Float64, 1}
+sol2flux333_newdata_all = solve(prob, alg)
+sol2flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1} |  
+
+# ---------------------------------------------------------------------------
+
+# LOTKA VOLTERRA CASE
+using Plots, StatsPlots
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u01 = [1.0, 1.0]
+p1 = [1.5, 1.0, 3.0, 1.0]
+tspan1 = (0.0, 6.0)
+prob1 = ODEProblem(lotka_volterra, u01, tspan1, p1)
+
+chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t1 = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+# --------------------------------------------------------------------------
+# original paper implementaion
+# 25 points  
+solution1 = solve(prob1, Tsit5(); saveat = 0.2)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_1 = [x1, y1, time1]
+
+plot(time1, u1[1, :])
+plot!(time1, u1[2, :])
+scatter!(dataset2_1[3], dataset2_1[1])
+scatter!(dataset2_1[3], dataset2_1[2])
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_1,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_normal = solve(prob1, alg1)
+sol1flux1_normal.estimated_ode_params[1]  #|
+sol1flux2_normal = solve(prob1, alg1)
+sol1flux2_normal.estimated_ode_params[1] #|
+sol1flux3_normal = solve(prob1, alg1)
+sol1flux3_normal.estimated_ode_params[1] #|
+
+# 50 points
+solution1 = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_2 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_2,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_normal = solve(prob1, alg1)
+sol1flux11_normal.estimated_ode_params[1] #|
+sol1flux22_normal = solve(prob1, alg1)
+sol1flux22_normal.estimated_ode_params[1] #|
+sol1flux33_normal = solve(prob1, alg1)
+sol1flux33_normal.estimated_ode_params[1] #|
+
+# 100 points
+solution = solve(prob1, Tsit5(); saveat = 0.05)
+time1 = solution1.t
+physsol1_1 = solution1.u
+u1 = hcat(solution1.u...)
+x1 = u1[1, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+y1 = u1[2, :] + 0.4 .* u1[1, :] .* randn(length(u1[1, :]))
+dataset2_3 = [x1, y1, time1]
+
+alg1 = NeuralPDE.BNNODE(chainlux,
+    dataset = dataset2_3,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_normal = solve(prob1, alg1)
+sol1flux111_normal.estimated_ode_params[1] #|
+sol1flux222_normal = solve(prob1, alg1)
+sol1flux222_normal.estimated_ode_params[1] #|
+sol1flux333_normal = solve(prob1, alg1)
+sol1flux333_normal.estimated_ode_params[1] #|
+
+# --------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood no l2 only new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new = solve(prob, alg)
+sol2flux1_new.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
+sol2flux2_new = solve(prob, alg)
+sol2flux2_new.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
+sol2flux3_new = solve(prob, alg)
+sol2flux3_new.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new = solve(prob, alg)
+sol2flux11_new.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
+sol2flux22_new = solve(prob, alg)
+sol2flux22_new.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
+sol2flux33_new = solve(prob, alg)
+sol2flux33_new.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new = solve(prob, alg)
+sol2flux111_new.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}   |
+sol2flux222_new = solve(prob, alg)
+sol2flux222_new.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}     |
+sol2flux333_new = solve(prob, alg)
+sol2flux333_new.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(NN gradients)
+# 25 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux1_new_all = solve(prob, alg)
+sol2flux1_new_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1}  |
+sol2flux2_new_all = solve(prob, alg)
+sol2flux2_new_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}    |
+sol2flux3_new_all = solve(prob, alg)
+sol2flux3_new_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}   |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux11_new_all = solve(prob, alg)
+sol2flux11_new_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}   |
+sol2flux22_new_all = solve(prob, alg)
+sol2flux22_new_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}   |
+sol2flux33_new_all = solve(prob, alg)
+sol2flux33_new_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1} |
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol2flux111_new_all = solve(prob, alg)
+sol2flux111_new_all.estimated_ode_params[1] #6.94385 Particles{Float64, 1},5.87832 Particles{Float64, 1}  |
+sol2flux222_new_all = solve(prob, alg)
+sol2flux222_new_all.estimated_ode_params[1] #5.888 Particles{Float64, 1},5.86901 Particles{Float64, 1}    |
+sol2flux333_new_all = solve(prob, alg)
+sol2flux333_new_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}  |
+
+# ---------------------------------------------------------------------------
+
+# ----------------------------------------------------------
+# Full likelihood  l2 + new L22(dataset gradients)
+# 25 points 
+# *1,*2 vs *2.5
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset1,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux1_newdata_all = solve(prob, alg)
+sol1flux1_newdata_all.estimated_ode_params[1] #5.35705 Particles{Float64, 1},5.91809 Particles{Float64, 1} |
+sol1flux2_newdata_all = solve(prob, alg)
+sol1flux2_newdata_all.estimated_ode_params[1] #6.73629 Particles{Float64, 1},5.966 Particles{Float64, 1}   |
+sol1flux3_newdata_all = solve(prob, alg)
+sol1flux3_newdata_all.estimated_ode_params[1] #4.64324 Particles{Float64, 1},5.9559 Particles{Float64, 1}  |
+
+# 50 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset2,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux11_newdata_all = solve(prob, alg)
+sol1flux11_newdata_all.estimated_ode_params[1] #6.43659 Particles{Float64, 1},6.03723 Particles{Float64, 1}    |
+sol1flux22_newdata_all = solve(prob, alg)
+sol1flux22_newdata_all.estimated_ode_params[1] # 6.4389 Particles{Float64, 1},6.01308 Particles{Float64, 1}    |
+sol1flux33_newdata_all = solve(prob, alg)
+sol1flux33_newdata_all.estimated_ode_params[1] # 7.10082 Particles{Float64, 1}, 6.03989 Particles{Float64, 1}   |
+
+# 100 points 
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset3,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+sol1flux111_newdata_all = solve(prob, alg)
+sol1flux111_newdata_all.estimated_ode_params[1]  #|
+sol1flux222_newdata_all = solve(prob, alg)
+sol1flux222_newdata_all.estimated_ode_params[1]  #|
+sol1flux333_newdata_all = solve(prob, alg)
+sol1flux333_newdata_all.estimated_ode_params[1] #6.96835 Particles{Float64, 1},5.86708 Particles{Float64, 1}   |
+
+# ------------------------------------------------------------------------------------------------------------------------------
+
+# sol2flux111.estimated_ode_params[1]
+# # mine *5
+# 7.03386Particles{Float64, 1}
+# # normal
+# 6.38951Particles{Float64, 1}
+# 6.67657Particles{Float64, 1}
+# # mine *10
+# 7.53672Particles{Float64, 1}
+# # mine *2
+# 6.29005Particles{Float64, 1}
+# 6.29844Particles{Float64, 1}
+
+# # new mine *2
+# 6.39008Particles{Float64, 1}
+# 6.22071Particles{Float64, 1}
+# 6.15611Particles{Float64, 1}
+
+# # new mine *2 tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
+# 6.25549Particles{Float64, 1}
+# ----------------------------------------------------------
+
+# ---------------------------------------------------
+
+function calculate_derivatives1(dataset)
+    x̂, time = dataset
+    num_points = length(x̂)
+    # Initialize an array to store the derivative values.
+    derivatives = similar(x̂)
+
+    for i in 2:(num_points - 1)
+        # Calculate the first-order derivative using central differences.
+        Δt_forward = time[i + 1] - time[i]
+        Δt_backward = time[i] - time[i - 1]
+
+        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+        derivatives[i] = derivative
+    end
+
+    # Derivatives at the endpoints can be calculated using forward or backward differences.
+    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+    return derivatives
+end
+
+function calculate_derivatives2(dataset)
+    u = dataset[1]
+    t = dataset[2]
+    # control points
+    n = Int(floor(length(t) / 10))
+    # spline for datasetvalues(solution) 
+    # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    interp = CubicSpline(u, t)
+    # derrivatives interpolation
+    dx = t[2] - t[1]
+    time = collect(t[1]:dx:t[end])
+    smoothu = [interp(i) for i in time]
+    # derivative of the spline (must match function derivative) 
+    û = tvdiff(smoothu, 20, 0.03, dx = dx, ε = 1)
+    # tvdiff(smoothu, 100, 0.1, dx = dx)
+    # 
+    # 
+    # FDM
+    û1 = diff(u) / dx
+    # dataset[1] and smoothu are almost equal(rounding errors)
+    return û, time, smoothu, û1
+end
+
+# need to do this for all datasets
+c = [linear(prob.u0, p, t) for t in dataset3[2]] #ideal case
+b = calculate_derivatives1(dataset2) #central diffs
+# a = calculate_derivatives2(dataset) #tvdiff(smoothu, 100, 0.1, dx = dx)
+d = calculate_derivatives2(dataset1) #tvdiff(smoothu, 20, 0.035, dx = dx, ε = 1e-2)
+d = calculate_derivatives2(dataset2)
+d = calculate_derivatives2(dataset3)
+mean(abs2.(c .- b))
+mean(abs2.(c .- d[1]))
+loss(model, x, y) = mean(abs2.(model(x) .- y));
+scatter!(prob.u0 .+ (prob.tspan[2] .- dataset3[2]) .* chainflux1(dataset3[2]')')
+loss(chainflux1, dataset3[2]', dataset3[1]')
+# mean(abs2.(c[1:24] .- a[4]))
+plot(c, label = "ideal deriv")
+plot!(b, label = "Centraldiff deriv")
+# plot!(a[1], label = "tvdiff(0.1,def) derivatives") 
+plot!(d[1], label = "tvdiff(0.035,20) derivatives")
+plotly()
+
+# GridTraining , NoiseRobustDiff dataset[2][2]-dataset[2][1] l2std
+# 25 points 
+ta = range(tspan[1], tspan[2], length = 25)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+time1 = collect(tspan[1]:(1 / 50.0):tspan[2])
+physsol = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+plot(physsol, label = "solution")
+
+# plots from 32(deriv)
+# for d
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux1 = solve(prob, alg)
+n2_sol2flux1.estimated_ode_params[1]
+# with extra likelihood 
+# 10.2011Particles{Float64, 1}
+
+# without extra likelihood 
+# 6.25791Particles{Float64, 1}
+# 6.29539Particles{Float64, 1}
+
+plot!(n2_sol2flux1.ensemblesol[1], label = "tvdiff(0.035,1) derivpar")
+plot(dataset[1])
+plot!(physsol1)
+# for a
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux2 = solve(prob, alg)
+n2_sol2flux2.estimated_ode_params[1]
+# with extra likelihood
+# 8.73602Particles{Float64, 1}
+# without extra likelihood
+
+plot!(n2_sol2flux2.ensemblesol[1],
+    label = "tvdiff(0.1,def) derivatives",
+    legend = :outerbottomleft)
+
+# for b
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux3 = solve(prob, alg)
+n2_sol2flux3.estimated_ode_params[1]
+plot!(n2_sol2flux3.ensemblesol[1], label = "Centraldiff deriv")
+
+# for c
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2000, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux4 = solve(prob, alg)
+n2_sol2flux4.estimated_ode_params[1]
+plot!(n2_sol2flux4.ensemblesol[1], label = "ideal deriv")
+
+# 50 points 
+
+ta = range(tspan[1], tspan[2], length = 50)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux11 = solve(prob, alg)
+n2_sol2flux11.estimated_ode_params[1]
+
+# 5.90049Particles{Float64, 1}
+# 100 points
+ta = range(tspan[1], tspan[2], length = 100)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 1500, physdt = 1 / 50.0f0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)],
+    Metric = DiagEuclideanMetric,
+    n_leapfrog = 30, progress = true)
+
+n2_sol2flux111 = solve(prob, alg)
+n2_sol2flux111.estimated_ode_params[1]
+plot!(n2_sol2flux111.ensemblesol[1])
+8.88555Particles{Float64, 1}
+
+# 7.15353Particles{Float64, 1}
+# 6.21059 Particles{Float64, 1}
+# 6.31836Particles{Float64, 1}
+0.1 * p
+# ----------------------------------------------------------
+
+# Gives the linear interpolation value at t=3.5
+
+# # Problem 1 with param esimation
+# # dataset 0-1 2 percent noise
+# p = 6.283185307179586
+# # partial_logdensity
+# 6.3549Particles{Float64, 1}
+# # full log_density
+# 6.34667Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2lux.estimated_ode_params[1]
+
+# # dataset 0-1 20 percent noise
+# # partial log_density
+# 6.30244Particles{Float64, 1}
+# # full log_density
+# 6.24637Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # dataset 0-2 20percent noise
+# # partial log_density
+# 6.24948Particles{Float64, 1}
+# # full log_density
+# 6.26095Particles{Float64, 1}
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+# linear = (u, p, t) -> cos(p * t)
+# tspan = (0.0, 2.0)
+
+# # dataset 0-1 2 percent noise
+# p = 6.283185307179586
+# # partial_logdensity
+# 6.3549Particles{Float64, 1}
+# # full log_density
+# 6.34667Particles{Float64, 1}
+
+# # dataset 0-1 20 percent noise
+# # partial log_density
+# 6.30244Particles{Float64, 1}
+# # full log_density
+# 6.24637Particles{Float64, 1}
+
+# # dataset 0-2 20percent noise
+# # partial log_density
+# 6.24948Particles{Float64, 1}
+# # full log_density
+# 6.26095Particles{Float64, 1}
+
+# # dataset 0-2 20percent noise 50 points(above all are 100 points)
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # i kinda win on 25 points again
+# # dataset 0-2 20percent noise 25 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # i win with 25 points
+# # dataset 0-1 20percent noise 25 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# # new
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# # New
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # (9,2.5)(above are (9,0.5))
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # just prev was repeat(just change)
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # i lose on 0-1,50 points
+# # dataset 0-1 20percent noise 50 points
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # (9,2.5) (above are (9,0.5))
+# # FuLL log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # partial log_density
+# sol2flux.estimated_ode_params[1]
+# sol2flux.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+# # Problem 1 with param estimation
+# # physdt=1/20, Full likelihood new 0.5*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux1 = solve(prob, alg)
+# n05_sol2flux1.estimated_ode_params[1] #6.90953 Particles{Float64, 1}
+# n05_sol2flux2 = solve(prob, alg)
+# n05_sol2flux2.estimated_ode_params[1] #6.82374 Particles{Float64, 1}
+# n05_sol2flux3 = solve(prob, alg)
+# n05_sol2flux3.estimated_ode_params[1] #6.84465 Particles{Float64, 1}
+
+# using Plots, StatsPlots
+# plot(n05_sol2flux3.ensemblesol[1])
+# plot!(physsol1)
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux11 = solve(prob, alg)
+# n05_sol2flux11.estimated_ode_params[1] #7.0262 Particles{Float64, 1}
+# n05_sol2flux22 = solve(prob, alg)
+# n05_sol2flux22.estimated_ode_params[1] #5.56438 Particles{Float64, 1}
+# n05_sol2flux33 = solve(prob, alg)
+# n05_sol2flux33.estimated_ode_params[1] #7.27189 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n05_sol2flux111 = solve(prob, alg)
+# n05_sol2flux111.estimated_ode_params[1] #6.90549 Particles{Float64, 1}
+# n05_sol2flux222 = solve(prob, alg)
+# n05_sol2flux222.estimated_ode_params[1] #5.42436 Particles{Float64, 1}
+# n05_sol2flux333 = solve(prob, alg)
+# n05_sol2flux333.estimated_ode_params[1] #6.05832 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new 2*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux1 = solve(prob, alg)
+# n2_sol2flux1.estimated_ode_params[1]#6.9087 Particles{Float64, 1}
+# n2_sol2flux2 = solve(prob, alg)
+# n2_sol2flux2.estimated_ode_params[1]#6.86507 Particles{Float64, 1}
+# n2_sol2flux3 = solve(prob, alg)
+# n2_sol2flux3.estimated_ode_params[1]#6.59206 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux11 = solve(prob, alg)
+# n2_sol2flux11.estimated_ode_params[1]#7.3715 Particles{Float64, 1}
+# n2_sol2flux22 = solve(prob, alg)
+# n2_sol2flux22.estimated_ode_params[1]#9.84477 Particles{Float64, 1}
+# n2_sol2flux33 = solve(prob, alg)
+# n2_sol2flux33.estimated_ode_params[1]#6.87107 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2_sol2flux111 = solve(prob, alg)
+# n2_sol2flux111.estimated_ode_params[1]#6.60739 Particles{Float64, 1}
+# n2_sol2flux222 = solve(prob, alg)
+# n2_sol2flux222.estimated_ode_params[1]#7.05923 Particles{Float64, 1}
+# n2_sol2flux333 = solve(prob, alg)
+# n2_sol2flux333.estimated_ode_params[1]#6.5017 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new all 2*l2std
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux1 = solve(prob, alg)
+# n2all5sol2flux1.estimated_ode_params[1]#11.3659 Particles{Float64, 1}
+# n2all5sol2flux2 = solve(prob, alg)
+# n2all5sol2flux2.estimated_ode_params[1]#6.65634 Particles{Float64, 1}
+# n2all5sol2flux3 = solve(prob, alg)
+# n2all5sol2flux3.estimated_ode_params[1]#6.61905 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux11 = solve(prob, alg)
+# n2all5sol2flux11.estimated_ode_params[1]#6.27555 Particles{Float64, 1}
+# n2all5sol2flux22 = solve(prob, alg)
+# n2all5sol2flux22.estimated_ode_params[1]#6.24352 Particles{Float64, 1}
+# n2all5sol2flux33 = solve(prob, alg)
+# n2all5sol2flux33.estimated_ode_params[1]#6.33723 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n2all5sol2flux111 = solve(prob, alg)
+# n2all5sol2flux111.estimated_ode_params[1] #5.95535 Particles{Float64, 1}
+# n2all5sol2flux222 = solve(prob, alg)
+# n2all5sol2flux222.estimated_ode_params[1] #5.98301 Particles{Float64, 1}
+# n2all5sol2flux333 = solve(prob, alg)
+# n2all5sol2flux333.estimated_ode_params[1] #5.9081 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new all (l2+l22)
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux1 = solve(prob, alg)
+# nall5sol2flux1.estimated_ode_params[1]#6.54705 Particles{Float64, 1}
+# nall5sol2flux2 = solve(prob, alg)
+# nall5sol2flux2.estimated_ode_params[1]#6.6967 Particles{Float64, 1}
+# nall5sol2flux3 = solve(prob, alg)
+# nall5sol2flux3.estimated_ode_params[1]#6.47173 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux11 = solve(prob, alg)
+# nall5sol2flux11.estimated_ode_params[1]#6.2113 Particles{Float64, 1}
+# nall5sol2flux22 = solve(prob, alg)
+# nall5sol2flux22.estimated_ode_params[1]#6.10675 Particles{Float64, 1}
+# nall5sol2flux33 = solve(prob, alg)
+# nall5sol2flux33.estimated_ode_params[1]#6.11541 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nall5sol2flux111 = solve(prob, alg)
+# nall5sol2flux111.estimated_ode_params[1]#6.35224 Particles{Float64, 1}
+# nall5sol2flux222 = solve(prob, alg)
+# nall5sol2flux222.estimated_ode_params[1]#6.40542 Particles{Float64, 1}
+# nall5sol2flux333 = solve(prob, alg)
+# nall5sol2flux333.estimated_ode_params[1]#6.44206 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new 5* (new only l22 mod)
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux1 = solve(prob, alg)
+# n5sol2flux1.estimated_ode_params[1]#7.05077 Particles{Float64, 1}
+# n5sol2flux2 = solve(prob, alg)
+# n5sol2flux2.estimated_ode_params[1]#7.07303 Particles{Float64, 1}
+# n5sol2flux3 = solve(prob, alg)
+# n5sol2flux3.estimated_ode_params[1]#5.10622 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux11 = solve(prob, alg)
+# n5sol2flux11.estimated_ode_params[1]#7.39852 Particles{Float64, 1}
+# n5sol2flux22 = solve(prob, alg)
+# n5sol2flux22.estimated_ode_params[1]#7.30319 Particles{Float64, 1}
+# n5sol2flux33 = solve(prob, alg)
+# n5sol2flux33.estimated_ode_params[1]#6.73722 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# n5sol2flux111 = solve(prob, alg)
+# n5sol2flux111.estimated_ode_params[1]#7.15996 Particles{Float64, 1}
+# n5sol2flux222 = solve(prob, alg)
+# n5sol2flux222.estimated_ode_params[1]#7.02949 Particles{Float64, 1}
+# n5sol2flux333 = solve(prob, alg)
+# n5sol2flux333.estimated_ode_params[1]#6.9393 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood new
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux1 = solve(prob, alg)
+# nsol2flux1.estimated_ode_params[1] #5.82707 Particles{Float64, 1}
+# nsol2flux2 = solve(prob, alg)
+# nsol2flux2.estimated_ode_params[1] #4.81534 Particles{Float64, 1}
+# nsol2flux3 = solve(prob, alg)
+# nsol2flux3.estimated_ode_params[1] #5.52965 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux11 = solve(prob, alg)
+# nsol2flux11.estimated_ode_params[1] #7.04027 Particles{Float64, 1}
+# nsol2flux22 = solve(prob, alg)
+# nsol2flux22.estimated_ode_params[1] #7.17588 Particles{Float64, 1}
+# nsol2flux33 = solve(prob, alg)
+# nsol2flux33.estimated_ode_params[1] #6.94495 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# nsol2flux111 = solve(prob, alg)
+# nsol2flux111.estimated_ode_params[1] #6.06608 Particles{Float64, 1}
+# nsol2flux222 = solve(prob, alg)
+# nsol2flux222.estimated_ode_params[1] #6.84726 Particles{Float64, 1}
+# nsol2flux333 = solve(prob, alg)
+# nsol2flux333.estimated_ode_params[1] #6.83463 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1 = solve(prob, alg)
+# sol2flux1.estimated_ode_params[1] #6.71397 Particles{Float64, 1} 6.37604 Particles{Float64, 1}
+# sol2flux2 = solve(prob, alg)
+# sol2flux2.estimated_ode_params[1] #6.73509 Particles{Float64, 1} 6.21692 Particles{Float64, 1}
+# sol2flux3 = solve(prob, alg)
+# sol2flux3.estimated_ode_params[1] #6.65453 Particles{Float64, 1} 6.23153 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11 = solve(prob, alg)
+# sol2flux11.estimated_ode_params[1] #6.23443 Particles{Float64, 1} 6.30635 Particles{Float64, 1}
+# sol2flux22 = solve(prob, alg)
+# sol2flux22.estimated_ode_params[1] #6.18879 Particles{Float64, 1} 6.30099 Particles{Float64, 1}
+# sol2flux33 = solve(prob, alg)
+# sol2flux33.estimated_ode_params[1] #6.22773 Particles{Float64, 1} 6.30671 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111 = solve(prob, alg)
+# sol2flux111.estimated_ode_params[1] #6.15832 Particles{Float64, 1} 6.35453 Particles{Float64, 1}
+# sol2flux222 = solve(prob, alg)
+# sol2flux222.estimated_ode_params[1] #6.16968 Particles{Float64, 1}6.31125 Particles{Float64, 1}
+# sol2flux333 = solve(prob, alg)
+# sol2flux333.estimated_ode_params[1] #6.12466 Particles{Float64, 1} 6.26514 Particles{Float64, 1}
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood
+# # 25 points
+# ta = range(tspan[1], tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux1_p = solve(prob, alg)
+# sol2flux1_p.estimated_ode_params[1] #5.74065 Particles{Float64, 1} #6.83683 Particles{Float64, 1}
+# sol2flux2_p = solve(prob, alg)
+# sol2flux2_p.estimated_ode_params[1] #9.82504 Particles{Float64, 1} #6.14568 Particles{Float64, 1}
+# sol2flux3_p = solve(prob, alg)
+# sol2flux3_p.estimated_ode_params[1] #5.75075 Particles{Float64, 1} #6.08579 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux11_p = solve(prob, alg)
+# sol2flux11_p.estimated_ode_params[1] #6.19414 Particles{Float64, 1} #6.04621 Particles{Float64, 1}
+# sol2flux22_p = solve(prob, alg)
+# sol2flux22_p.estimated_ode_params[1] #6.15227 Particles{Float64, 1} #6.29086 Particles{Float64, 1}
+# sol2flux33_p = solve(prob, alg)
+# sol2flux33_p.estimated_ode_params[1] #6.19048 Particles{Float64, 1} #6.12516 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol2flux111_p = solve(prob, alg)
+# sol2flux111_p.estimated_ode_params[1] #6.51608 Particles{Float64, 1}# 6.42945Particles{Float64, 1}
+# sol2flux222_p = solve(prob, alg)
+# sol2flux222_p.estimated_ode_params[1] #6.4875 Particles{Float64, 1} # 6.44524Particles{Float64, 1}
+# sol2flux333_p = solve(prob, alg)
+# sol2flux333_p.estimated_ode_params[1] #6.51679 Particles{Float64, 1}# 6.43152Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood, dataset(1.0-2.0)
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux1 = solve(prob, alg)
+# sol1flux1.estimated_ode_params[1] #6.35164 Particles{Float64, 1}
+# sol1flux2 = solve(prob, alg)
+# sol1flux2.estimated_ode_params[1] #6.30919 Particles{Float64, 1}
+# sol1flux3 = solve(prob, alg)
+# sol1flux3.estimated_ode_params[1] #6.33554 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux11 = solve(prob, alg)
+# sol1flux11.estimated_ode_params[1] #6.39769 Particles{Float64, 1}
+# sol1flux22 = solve(prob, alg)
+# sol1flux22.estimated_ode_params[1] #6.43924 Particles{Float64, 1}
+# sol1flux33 = solve(prob, alg)
+# sol1flux33.estimated_ode_params[1] #6.4697 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux111 = solve(prob, alg)
+# sol1flux111.estimated_ode_params[1] #6.27812 Particles{Float64, 1}
+# sol1flux222 = solve(prob, alg)
+# sol1flux222.estimated_ode_params[1] #6.19278 Particles{Float64, 1}
+# sol1flux333 = solve(prob, alg)
+# sol1flux333.estimated_ode_params[1] # 9.68244Particles{Float64, 1} (first try) # 6.23969 Particles{Float64, 1}(second try)
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1.0-2.0)
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux1_p = solve(prob, alg)
+# sol1flux1_p.estimated_ode_params[1]#6.36269 Particles{Float64, 1}
+
+# sol1flux2_p = solve(prob, alg)
+# sol1flux2_p.estimated_ode_params[1]#6.34685 Particles{Float64, 1}
+
+# sol1flux3_p = solve(prob, alg)
+# sol1flux3_p.estimated_ode_params[1]#6.31421 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux11_p = solve(prob, alg)
+# sol1flux11_p.estimated_ode_params[1] #6.15725 Particles{Float64, 1}
+
+# sol1flux22_p = solve(prob, alg)
+# sol1flux22_p.estimated_ode_params[1] #6.18145 Particles{Float64, 1}
+
+# sol1flux33_p = solve(prob, alg)
+# sol1flux33_p.estimated_ode_params[1] #6.21905 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1flux111_p = solve(prob, alg)
+# sol1flux111_p.estimated_ode_params[1]#6.13481 Particles{Float64, 1}
+
+# sol1flux222_p = solve(prob, alg)
+# sol1flux222_p.estimated_ode_params[1]#9.68555 Particles{Float64, 1}
+
+# sol1flux333_p = solve(prob, alg)
+# sol1flux333_p.estimated_ode_params[1]#6.1477 Particles{Float64, 1}
+
+# # -----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1-2), again but different density
+# # 12 points
+# ta = range(1.0, tspan[2], length = 12)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux1_p = solve(prob, alg)
+# sol3flux1_p.estimated_ode_params[1]#6.50048 Particles{Float64, 1}
+# sol3flux2_p = solve(prob, alg)
+# sol3flux2_p.estimated_ode_params[1]#6.57597 Particles{Float64, 1}
+# sol3flux3_p = solve(prob, alg)
+# sol3flux3_p.estimated_ode_params[1]#6.24487 Particles{Float64, 1}
+
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux11_p = solve(prob, alg)
+# sol3flux11_p.estimated_ode_params[1]#6.53093 Particles{Float64, 1}
+
+# sol3flux22_p = solve(prob, alg)
+# sol3flux22_p.estimated_ode_params[1]#6.32744 Particles{Float64, 1}
+
+# sol3flux33_p = solve(prob, alg)
+# sol3flux33_p.estimated_ode_params[1]#6.49175 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux111_p = solve(prob, alg)
+# sol3flux111_p.estimated_ode_params[1]#6.4455 Particles{Float64, 1}
+# sol3flux222_p = solve(prob, alg)
+# sol3flux222_p.estimated_ode_params[1]#6.40736 Particles{Float64, 1}
+# sol3flux333_p = solve(prob, alg)
+# sol3flux333_p.estimated_ode_params[1]#6.46214 Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(0-1)
+# # 25 points
+# ta = range(tspan[1], 1.0, length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux1_p = solve(prob, alg)
+# sol0flux1_p.estimated_ode_params[1]#7.12625 Particles{Float64, 1}
+# sol0flux2_p = solve(prob, alg)
+# sol0flux2_p.estimated_ode_params[1]#8.40948 Particles{Float64, 1}
+# sol0flux3_p = solve(prob, alg)
+# sol0flux3_p.estimated_ode_params[1]#7.18768 Particles{Float64, 1}
+
+# # 50 points
+# ta = range(tspan[1], 1.0, length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux11_p = solve(prob, alg)
+# sol0flux11_p.estimated_ode_params[1]#6.23707 Particles{Float64, 1}
+# sol0flux22_p = solve(prob, alg)
+# sol0flux22_p.estimated_ode_params[1]#6.09728 Particles{Float64, 1}
+# sol0flux33_p = solve(prob, alg)
+# sol0flux33_p.estimated_ode_params[1]#6.12971 Particles{Float64, 1}
+
+# # 100 points
+# ta = range(tspan[1], 1.0, length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [LogNormal(9, 0.5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol0flux111_p = solve(prob, alg)
+# sol0flux111_p.estimated_ode_params[1]#5.99039 Particles{Float64, 1}
+# sol0flux222_p = solve(prob, alg)
+# sol0flux222_p.estimated_ode_params[1]#5.89609 Particles{Float64, 1}
+# sol0flux333_p = solve(prob, alg)
+# sol0flux333_p.estimated_ode_params[1]#5.91923 Particles{Float64, 1}
+
+# # ---------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, Full likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f1 = solve(prob, alg)
+# sol1f1.estimated_ode_params[1]
+# # 10.9818Particles{Float64, 1}
+# sol1f2 = solve(prob, alg)
+# sol1f2.estimated_ode_params[1]
+# # sol1f3 = solve(prob, alg)
+# # sol1f3.estimated_ode_params[1]
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f11 = solve(prob, alg)
+# sol1f11.estimated_ode_params[1]
+# sol1f22 = solve(prob, alg)
+# sol1f22.estimated_ode_params[1]
+# # sol1f33 = solve(prob, alg)
+# # sol1f33.estimated_ode_params[1]
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 6.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f111 = solve(prob, alg)
+# sol1f111.estimated_ode_params[1]
+# sol1f222 = solve(prob, alg)
+# sol1f222.estimated_ode_params[1]
+# # sol1f333 = solve(prob, alg)
+# # sol1f333.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+
+# # ----------------------------------------------------------
+# # physdt=1/20, partial likelihood, dataset(1.0-2.0), Normal(12,5) distri prior
+# # 25 points
+# ta = range(1.0, tspan[2], length = 25)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f1_p = solve(prob, alg)
+# sol1f1_p.estimated_ode_params[1]
+# sol1f2_p = solve(prob, alg)
+# sol1f2_p.estimated_ode_params[1]
+# sol1f3_p = solve(prob, alg)
+# sol1f3_p.estimated_ode_params[1]
+
+# # 50 points
+# ta = range(1.0, tspan[2], length = 50)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f11_p = solve(prob, alg)
+# sol1f11_p.estimated_ode_params[1]
+# sol1f22_p = solve(prob, alg)
+# sol1f22_p.estimated_ode_params[1]
+# sol1f33_p = solve(prob, alg)
+# sol1f33_p.estimated_ode_params[1]
+
+# # 100 points
+# ta = range(1.0, tspan[2], length = 100)
+# u = [linear_analytic(u0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂, time]
+# physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+#     draw_samples = 1500, physdt = 1 / 50.0f0,
+#     priorsNNw = (0.0, 3.0),
+#     param = [Normal(12, 5)],
+#     Metric = DiagEuclideanMetric,
+#     n_leapfrog = 30, progress = true)
+
+# sol1f111_p = solve(prob, alg)
+# sol1f111_p.estimated_ode_params[1]
+# sol1f222_p = solve(prob, alg)
+# sol1f222_p.estimated_ode_params[1]
+# sol1f333_p = solve(prob, alg)
+# sol1f333_p.estimated_ode_params[1]
+
+# # ----------------------------------------------------------
+
+# plot!(title = "9,2.5 50 training 2>full,1>partial")
+
+# p
+# param1
+# # (lux chain)
+# @prob mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 8e-2
+
+# # estimated parameters(lux chain)
+# param1 = sol3lux_pestim.estimated_ode_params[1]
+# @test abs(param1 - p) < abs(0.35 * p)
+
+# p
+# param1
+
+# # # my suggested Loss likelihood part
+# # #  + L2loss2(Tar, θ)
+# # # My suggested extra loss function
+# # function L2loss2(Tar::LogTargetDensity, θ)
+# #     f = Tar.prob.f
+
+# #     # parameter estimation chosen or not
+# #     if Tar.extraparams > 0
+# #         dataset = Tar.dataset
+
+# #         # Timepoints to enforce Physics
+# #         dataset = Array(reduce(hcat, dataset)')
+# #         t = dataset[end, :]
+# #         û = dataset[1:(end - 1), :]
+
+# #         ode_params = Tar.extraparams == 1 ?
+# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+# #                      θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+# #         if length(û[:, 1]) == 1
+# #             physsol = [f(û[:, i][1],
+# #                 ode_params,
+# #                 t[i])
+# #                        for i in 1:length(û[1, :])]
+# #         else
+# #             physsol = [f(û[:, i],
+# #                 ode_params,
+# #                 t[i])
+# #                        for i in 1:length(û[1, :])]
+# #         end
+# #         #form of NN output matrix output dim x n
+# #         deri_physsol = reduce(hcat, physsol)
+
+# #         #   OG deriv(basically gradient matching in case of an ODEFunction)
+# #         # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+# #         # if length(û[:, 1]) == 1
+# #         #     deri_sol = [f(û[:, i][1],
+# #         #         Tar.prob.p,
+# #         #         t[i])
+# #         #                 for i in 1:length(û[1, :])]
+# #         # else
+# #         #     deri_sol = [f(û[:, i],
+# #         #         Tar.prob.p,
+# #         #         t[i])
+# #         #                 for i in 1:length(û[1, :])]
+# #         # end
+# #         # deri_sol = reduce(hcat, deri_sol)
+# #         derivatives = calculate_derivatives(Tar.dataset)
+# #         deri_sol = reduce(hcat, derivatives)
+
+# #         physlogprob = 0
+# #         for i in 1:length(Tar.prob.u0)
+# #             # can add phystd[i] for u[i]
+# #             physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+# #                     LinearAlgebra.Diagonal(map(abs2,
+# #                         Tar.l2std[i] .*
+# #                         ones(length(deri_sol[i, :]))))),
+# #                 deri_sol[i, :])
+# #         end
+# #         return physlogprob
+# #     else
+# #         return 0
+# #     end
+# # end
+
+# # function calculate_derivatives(dataset)
+# #     x̂, time = dataset
+# #     num_points = length(x̂)
+
+# #     # Initialize an array to store the derivative values.
+# #     derivatives = similar(x̂)
+
+# #     for i in 2:(num_points - 1)
+# #         # Calculate the first-order derivative using central differences.
+# #         Δt_forward = time[i + 1] - time[i]
+# #         Δt_backward = time[i] - time[i - 1]
+
+# #         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+# #         derivatives[i] = derivative
+# #     end
+
+# #     # Derivatives at the endpoints can be calculated using forward or backward differences.
+# #     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+# #     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+
+# #     return derivatives
+# # end
+
+# size(dataset[1])
+# # Problem 1 with param estimation(flux,lux)
+# # Normal
+# # 6.20311 Particles{Float64, 1},6.21746Particles{Float64, 1}
+# # better
+# # 6.29093Particles{Float64, 1}, 6.27925Particles{Float64, 1}
+# # Non ideal case
+# # 6.14861Particles{Float64, 1}, 
+# sol2flux.estimated_ode_params
+# sol2lux.estimated_ode_params[1]
+# p
+# size(sol3flux_pestim.ensemblesol[2])
+# plott = sol3flux_pestim.ensemblesol[1]
+# using StatsPlots
+# plotly()
+# plot(t, sol3flux_pestim.ensemblesol[1])
+
+# function calculate_derivatives(dataset)
+#     x̂, time = dataset
+#     num_points = length(x̂)
+
+#     # Initialize an array to store the derivative values.
+#     derivatives = similar(x̂)
+
+#     for i in 2:(num_points - 1)
+#         # Calculate the first-order derivative using central differences.
+#         Δt_forward = time[i + 1] - time[i]
+#         Δt_backward = time[i] - time[i - 1]
+
+#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+#         derivatives[i] = derivative
+#     end
+
+#     # Derivatives at the endpoints can be calculated using forward or backward differences.
+#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+
+#     return derivatives
+# end
+
+# # Example usage:
+# # dataset = [x̂, time]
+# derivatives = calculate_derivatives(dataset)
+# dataset[1]
+# # Access derivative values at specific time points as needed.
+
+# # # 9,0.5
+# # 0.09894916260292887
+# # 0.09870335436072103
+# # 0.08398556878067913
+# # 0.10109070099105527
+# # 0.09122683737517055
+# # 0.08614958011892977
+# # mean(abs.(x̂ .- meanscurve1)) #0.017112298305523976
+# # mean(abs.(physsol1 .- meanscurve1)) #0.004038636894341354
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1))#0.01800876370000113
+# # mean(abs.(physsol1 .- meanscurve1))#0.007285681280600875
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.10599926120358046
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10375554193397989
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.10160824458252521
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09999942538357891
+
+# # # ------------------------------------------------normale
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.0333356493928835
+# # mean(abs.(physsol1 .- meanscurve1)) #0.02721733876400459
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.020734206709433347
+# # mean(abs.(physsol1 .- meanscurve1)) #0.012502850740700212
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.10615859683094729
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10508141153722575
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.10833514946031565
+# # mean(abs.(physsol1 .- meanscurve1)) #0.10668470203219232
+
+# # # 9,0.5
+# # 10.158108285475553
+# # 10.207234384538026
+# # 10.215000657664852
+# # 10.213817644016174
+# # 13.380030074088719
+# # 13.348906350967326
+
+# # 6.952731422892041
+
+# # # All losses
+# # 10.161478523326277
+# # # L2 losses 1
+# # 9.33312996960278
+# # # L2 losses 2
+# # 10.217417241370631
+
+# # mean([fhsamples1[i][26] for i in 500:1000]) #6.245045767509431
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.212522300650451
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #35.328636809737695
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #35.232963812125654
+
+# # # ---------------------------------------normale
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.547771572198114
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.158906185002702
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.210400972620185
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.153845019454522
+
+# # # ----------------more dataset normale -----------------------------
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.271141178216537
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.241144692919369
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.124480447973127
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.07838011629903
+
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.016551602015599295
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0021488618484224245
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.017022725082640747
+# # mean(abs.(physsol1 .- meanscurve1)) #0.004339761917100232
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.09668785317864312
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09430712337543362
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.09958118358974392
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09717454226368502
+
+# # # ----------------more dataset special -----------------------------
+# # # 9,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.284355334485365
+# # p #6.283185307179586
+# # # 9,4
+# # mean([fhsamples1[i][23] for i in 500:1000]) #6.259238106698602
+# # # 30,30
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.139808934336987
+# # # 30,0.5
+# # mean([fhsamples1[i][23] for i in 500:1000]) #29.03921327641226
+
+# # # 9,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.016627231605546876
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0020311429130039564
+# # # 9,4(little worse)
+# # mean(abs.(x̂ .- meanscurve1)) #0.016650324577507352
+# # mean(abs.(physsol1 .- meanscurve1)) #0.0027537543411154677
+# # # 30,30
+# # mean(abs.(x̂ .- meanscurve1)) #0.09713187937270151
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
+# # # 30,0.5
+# # mean(abs.(x̂ .- meanscurve1)) #0.09550234866855814
+# # mean(abs.(physsol1 .- meanscurve1)) #0.09317278450371556
+
+# # using Plots, StatsPlots
+# # plotly()
+
+# # ---------------------------------------------------------
+# # # # Distribution abstract in wrapper, dataset Float64
+# # # 268.651 s (206393690 allocations: 388.71 GiB)
+# # # 318.170551 seconds (206.29 M allocations: 388.453 GiB, 20.83% gc time)
+
+# # # # Above with dataset Real subtype
+# # # 326.201 s (206327409 allocations: 388.42 GiB)
+# # # 363.189370 seconds (206.25 M allocations: 387.975 GiB, 15.77% gc time)
+# # # 306.171 s (206321277 allocations: 388.55 GiB)
+# # # 356.180699 seconds (206.43 M allocations: 388.361 GiB, 13.77% gc time)
+
+# # # # Above with dataset AbstractFloat subtype
+# # # 290.751187 seconds (205.94 M allocations: 387.955 GiB, 12.92% gc time)
+# # # 296.319815 seconds (206.38 M allocations: 388.730 GiB, 12.69% gc time)
+
+# # # # ODEProblem float64 dtaset and vector distri inside
+# # #   273.169 s (206128318 allocations: 388.40 GiB)
+# # #   274.059531 seconds (205.91 M allocations: 387.953 GiB, 12.77% gc time)
+
+# # # #   Dataset float64 inside and vector distri outsude
+# # #   333.603 s (206251143 allocations: 388.41 GiB)
+# # # 373.377222 seconds (206.11 M allocations: 387.968 GiB, 13.25% gc time)
+# # #   359.745 s (206348301 allocations: 388.41 GiB)
+# # # 357.813114 seconds (206.31 M allocations: 388.354 GiB, 13.54% gc time)
+
+# # # # Dataset float64 inside and vector distri inside
+# # #   326.437 s (206253571 allocations: 388.41 GiB)
+# # #   290.334083 seconds (205.92 M allocations: 387.954 GiB, 13.82% gc time)
+
+# # # # current setting
+# # # 451.304 s (206476927 allocations: 388.43 GiB)
+# # # 384.532732 seconds (206.22 M allocations: 387.976 GiB, 13.17% gc time)
+# # # 310.223 s (206332558 allocations: 388.63 GiB)
+# # # 344.243889 seconds (206.34 M allocations: 388.409 GiB, 13.84% gc time)
+# # # 357.457737 seconds (206.66 M allocations: 389.064 GiB, 18.16% gc time)
+
+# # # # shit setup
+# # #   325.595 s (206283732 allocations: 388.41 GiB)
+# # # 334.248753 seconds (206.06 M allocations: 387.964 GiB, 12.60% gc time)
+# # #   326.011 s (206370857 allocations: 388.56 GiB)
+# # # 327.203339 seconds (206.29 M allocations: 388.405 GiB, 12.92% gc time)
+
+# # # # in wrapper Distribution prior, insiade FLOAT64 DATASET
+# # # 325.158167 seconds (205.97 M allocations: 387.958 GiB, 15.07% gc time) 
+# # #   429.536 s (206476324 allocations: 388.43 GiB)
+# # #   527.364 s (206740343 allocations: 388.58 GiB)
+
+# # # #   wrapper Distribtuion, inside Float64
+# # # 326.017 s (206037971 allocations: 387.96 GiB)
+# # # 347.424730 seconds (206.45 M allocations: 388.532 GiB, 12.92% gc time)
+
+# # # 439.047568 seconds (284.24 M allocations: 392.598 GiB, 15.25% gc time, 14.36% compilation time: 0% of which was recompilation)
+# # # 375.472142 seconds (206.40 M allocations: 388.529 GiB, 14.93% gc time)
+# # # 374.888820 seconds (206.34 M allocations: 388.346 GiB, 14.09% gc time)
+# # # 363.719611 seconds (206.39 M allocations: 388.581 GiB, 15.08% gc time)
+# # # # inside Distribtion, instide Float64
+# # #   310.238 s (206324249 allocations: 388.53 GiB)
+# # #   308.991494 seconds (206.34 M allocations: 388.549 GiB, 14.01% gc time)
+# # #   337.442 s (206280712 allocations: 388.36 GiB)
+# # #   299.983096 seconds (206.29 M allocations: 388.512 GiB, 17.14% gc time)
+
+# # #   394.924357 seconds (206.27 M allocations: 388.337 GiB, 23.68% gc time)
+# # # 438.204179 seconds (206.39 M allocations: 388.470 GiB, 23.84% gc time)
+# # #   376.626914 seconds (206.46 M allocations: 388.693 GiB, 18.72% gc time)
+# # # 286.863795 seconds (206.14 M allocations: 388.370 GiB, 18.80% gc time)
+# # #   285.556929 seconds (206.22 M allocations: 388.371 GiB, 17.04% gc time)
+# # #   291.471662 seconds (205.96 M allocations: 388.068 GiB, 19.85% gc time)
+
+# # # 495.814341 seconds (284.62 M allocations: 392.622 GiB, 12.56% gc time, 10.96% compilation time: 0% of which was recompilation)
+# # # 361.530617 seconds (206.36 M allocations: 388.526 GiB, 14.98% gc time)
+# # # 348.576065 seconds (206.22 M allocations: 388.337 GiB, 15.01% gc time)
+# # # 374.575609 seconds (206.45 M allocations: 388.586 GiB, 14.65% gc time)
+# # # 314.223008 seconds (206.23 M allocations: 388.411 GiB, 14.63% gc time)
+
+# # PROBLEM-3 LOTKA VOLTERRA EXAMPLE [WIP] (WITH PARAMETER ESTIMATION)(will be put in tutorial page)
+# function lotka_volterra(u, p, t)
+#     # Model parameters.
+#     α, β, γ, δ = p
+#     # Current state.
+#     x, y = u
+
+#     # Evaluate differential equations.
+#     dx = (α - β * y) * x # prey
+#     dy = (δ * x - γ) * y # predator
+
+#     return [dx, dy]
+# end
+
+# u0 = [1.0, 1.0]
+# p = [1.5, 1.0, 3.0, 1.0]
+# tspan = (0.0, 6.0)
+# prob = ODEProblem(lotka_volterra, u0, tspan, p)
+# solution = solve(prob, Tsit5(); saveat = 0.05)
+
+# as = reduce(hcat, solution.u)
+# as[1, :]
+# # Plot simulation.
+# time = solution.t
+# u = hcat(solution.u...)
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x = u[1, :] + 0.5 * randn(length(u[1, :]))
+# y = u[2, :] + 0.5 * randn(length(u[1, :]))
+# dataset = [x[1:50], y[1:50], time[1:50]]
+# # scatter!(time, [x, y])
+# # scatter!(dataset[3], [dataset[2], dataset[1]])
+
+# # NN has 2 outputs as u -> [dx,dy]
+# chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
+#     Lux.Dense(6, 2))
+# chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
+
+# # fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+# #                                                                           dataset = dataset,
+# #                                                                           draw_samples = 1000,
+# #                                                                           l2std = [
+# #                                                                               0.05,
+# #                                                                               0.05,
+# #                                                                           ],
+# #                                                                           phystd = [
+# #                                                                               0.05,
+# #                                                                               0.05,
+# #                                                                           ],
+# #                                                                           priorsNNw = (0.0,
+# #          
+
+# #   3.0))
+
+# # check if NN output is more than 1
+# # numoutput = size(luxar[1])[1]
+# # if numoutput > 1
+# #     # Initialize a vector to store the separated outputs for each output dimension
+# #     output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput]
+
+# #     # Loop through each element in the `as` vector
+# #     for element in as
+# #         for i in 1:numoutput
+# #             push!(output_matrices[i], element[i, :])  # Append the i-th output (i-th row) to the i-th output_matrices
+# #         end
+# #     end
+
+# #     ensemblecurves = Vector{}[]
+# #     for r in 1:numoutput
+# #         br = hcat(output_matrices[r]...)'
+# #         ensemblecurve = prob.u0[r] .+
+# #                         [Particles(br[:, i]) for i in 1:length(t)] .*
+# #                         (t .- prob.tspan[1])
+# #         push!(ensemblecurves, ensemblecurve)
+# #     end
+
+# # else
+# #     # ensemblecurve = prob.u0 .+
+# #     #                 [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .*
+# #     #                 (t .- prob.tspan[1])
+# #     print("yuh")
+# # end
+
+# # fhsamplesflux2
+# # nnparams = length(init1)
+# # estimnnparams = [Particles(reduce(hcat, fhsamplesflux2)[i, :]) for i in 1:nnparams]
+# # ninv=4
+# # estimated_params = [Particles(reduce(hcat, fhsamplesflux2[(end - ninv + 1):end])[i, :])
+# #                     for i in (nnparams + 1):(nnparams + ninv)]
+# # output_matrices[r]
+# # br = hcat(output_matrices[r]...)'
+
+# # br[:, 1]
+
+# # [Particles(br[:, i]) for i in 1:length(t)]
+# # prob.u0
+# # [Particles(br[:, i]) for i in 1:length(t)] .*
+# # (t .- prob.tspan[1])
+
+# # ensemblecurve = prob.u0[r] .+
+# #                 [Particles(br[:, i]) for i in 1:length(t)] .*
+# #                 (t .- prob.tspan[1])
+# # push!(ensemblecurves, ensemblecurve)
+
+# using StatsPlots
+# plotly()
+# plot(t, ensemblecurve)
+# plot(t, ensemblecurves[1])
+# plot!(t, ensemblecurves[2])
+# ensemblecurve
+# ensemblecurves[1]
+# fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(1.5,
+#             0.5),
+#         Normal(1.2,
+#             0.5),
+#         Normal(3.3,
+#             0.5),
+#         Normal(1.4,
+#             0.5),
+#     ], progress = true)
+
+# alg = NeuralPDE.BNNODE(chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(4.5,
+#             5),
+#         Normal(7,
+#             2),
+#         Normal(5,
+#             2),
+#         Normal(-4,
+#             6),
+#     ],
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux_pestim = solve(prob, alg)
+
+# # OG PARAM VALUES
+# [1.5, 1.0, 3.0, 1.0]
+# # less
+# # [1.34, 7.51, 2.54, -2.55]
+# # better
+# # [1.48, 0.993, 2.77, 0.954]
+
+# sol3flux_pestim.es
+# sol3flux_pestim.estimated_ode_params
+# # fh_mcmc_chainlux1, fhsampleslux1, fhstatslux1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                                        dataset = dataset,
+# #                                                                        draw_samples = 1000,
+# #                                                                        l2std = [0.05, 0.05],
+# #                                                                        phystd = [
+# #                                                                            0.05,
+# #                                                                            0.05,
+# #                                                                        ],
+# #                                                                        priorsNNw = (0.0,
+# #                                                                                     3.0))
+
+# # fh_mcmc_chainlux2, fhsampleslux2, fhstatslux2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                                        dataset = dataset,
+# #                                                                        draw_samples = 1000,
+# #                                                                        l2std = [0.05, 0.05],
+# #                                                                        phystd = [
+# #                                                                            0.05,
+# #                                                                            0.05,
+# #                                                                        ],
+# #                                                                        priorsNNw = (0.0,
+# #                                                                                     3.0),
+# #                                                                        param = [
+# #                                                                            Normal(1.5, 0.5),
+# #                                                                            Normal(1.2, 0.5),
+# #                                                                            Normal(3.3, 0.5),
+# #                                                                            Normal(1.4, 0.5),
+# #                                                                        ])
+
+# init1, re1 = destructure(chainflux1)
+# θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+# #   PLOT testing points
+# t = time
+# p = prob.p
+# collect(Float64, vcat(ComponentArrays.ComponentArray(θinit)))
+# collect(Float64, ComponentArrays.ComponentArray(θinit))
+# # Mean of last 1000 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+# out = re1.([fhsamplesflux1[i][1:68] for i in 500:1000])
+# yu = [out[i](t') for i in eachindex(out)]
+
+# function getensemble(yu, num_models)
+#     num_rows, num_cols = size(yu[1])
+#     row_means = zeros(Float32, num_rows, num_cols)
+#     for i in 1:num_models
+#         row_means .+= yu[i]
+#     end
+#     row_means ./ num_models
+# end
+# fluxmean = getensemble(yu, length(out))
+# meanscurve1_1 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
+# mean(abs.(u .- meanscurve1_1))
+
+# plot!(t, physsol1)
+# @test mean(abs2.(x̂ .- meanscurve1_1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# out = re1.([fhsamplesflux2[i][1:68] for i in 500:1000])
+# yu = collect(out[i](t') for i in eachindex(out))
+# fluxmean = getensemble(yu, length(out))
+# meanscurve1_2 = prob.u0 .+ (t' .- prob.tspan[1]) .* fluxmean
+# mean(abs.(u .- meanscurve1_2))
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# θ = [vector_to_parameters(fhsampleslux1[i][1:(end - 4)], θinit) for i in 500:1000]
+# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+# meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# θ = [vector_to_parameters(fhsampleslux2[i][1:(end - 4)], θinit) for i in 500:1000]
+# luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+# luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+# meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# @test mean(abs2.(x̂ .- meanscurve1)) < 2e-2
+# @test mean(abs2.(physsol1 .- meanscurve1)) < 2e-2
+# @test mean(abs2.(x̂ .- meanscurve2)) < 3e-3
+# @test mean(abs2.(physsol1 .- meanscurve2)) < 2e-3
+
+# # # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+# @test abs(p - mean([fhsamplesflux2[i][69] for i in 500:1000])) < 0.1 * p[1]
+# @test abs(p - mean([fhsampleslux2[i][69] for i in 500:1000])) < 0.2 * p[1]
+
+# # @test abs(p - mean([fhsamplesflux2[i][70] for i in 500:1000])) < 0.1 * p[2]
+# # @test abs(p - mean([fhsampleslux2[i][70] for i in 500:1000])) < 0.2 * p[2]
+
+# # @test abs(p - mean([fhsamplesflux2[i][71] for i in 500:1000])) < 0.1 * p[3]
+# # @test abs(p - mean([fhsampleslux2[i][71] for i in 500:1000])) < 0.2 * p[3]
+
+# # @test abs(p - mean([fhsamplesflux2[i][72] for i in 500:1000])) < 0.1 * p[4]
+# # @test abs(p - mean([fhsampleslux2[i][72] for i in 500:1000])) < 0.2 * p[4]
+
+# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                               dataset = dataset,
+# #                                                               draw_samples = 1000,
+# #                                                               l2std = [0.05, 0.05],
+# #                                                               phystd = [0.05, 0.05],
+# #                                                               priorsNNw = (0.0, 3.0),
+# #                                                               param = [
+# #                                                                   Normal(1.5, 0.5),
+# #                                                                   Normal(1.2, 0.5),
+# #                                                                   Normal(3.3, 0.5),
+# #                                                                   Normal(1.4, 0.5),
+# #                                                               ], autodiff = true)
+
+# # fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+# #                                                               dataset = dataset,
+# #                                                               draw_samples = 1000,
+# #                                                               l2std = [0.05, 0.05],
+# #                                                               phystd = [0.05, 0.05],
+# #                                                               priorsNNw = (0.0, 3.0),
+# #                                                               param = [
+# #                                                                   Normal(1.5, 0.5),
+# #                                                                   Normal(1.2, 0.5),
+# #                                                                   Normal(3.3, 0.5),
+# #                                                                   Normal(1.4, 0.5),
+# #                                                               ], nchains = 2)
+
+# # NOTES (WILL CLEAR LATER)
+# # --------------------------------------------------------------------------------------------
+# # Hamiltonian energy must be lowest(more paramters the better is it to map onto them)
+# # full better than L2 and phy individual(test)
+# # in mergephys more points after training points is better from 20->40
+# # does consecutive runs bceome better? why?(plot 172)(same chain maybe)
+# # does density of points in timespan matter dataset vs internal timespan?(plot 172)(100+0.01)
+# # when training from 0-1 and phys from 1-5 with 1/150 simple nn slow,but bigger nn faster decrease in Hmailtonian
+# # bigger time interval more curves to adapt to only more parameters adapt to that, better NN architecture
+# # higher order logproblems solve better
+# # repl up up are same instances? but reexecute calls are new?
+
+# #Compare results against paper example
+# # Lux chains support (DONE)
+# # fix predictions for odes depending upon 1,p in f(u,p,t)(DONE)
+# # lotka volterra learn curve beyond l2 losses(L2 losses determine accuracy of parameters)(parameters cant run free ∴ L2 interval only)
+# # check if prameters estimation works(YES)
+# # lotka volterra parameters estimate (DONE)
+
+# using NeuralPDE, Lux, Flux, Optimization, OptimizationOptimJL
+# import ModelingToolkit: Interval
+# using Plots, StatsPlots
+# plotly()
+# # Profile.init()
+
+# @parameters x y
+# @variables u(..)
+# Dxx = Differential(x)^2
+# Dyy = Differential(y)^2
+
+# # 2D PDE
+# eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# # Boundary conditions
+# bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+#     u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+# # Space and time domains
+# domains = [x ∈ Interval(0.0, 1.0),
+#     y ∈ Interval(0.0, 1.0)]
+
+# # Neural network
+# dim = 2 # number of dimensions
+# chain = Flux.Chain(Flux.Dense(dim, 16, Lux.σ), Flux.Dense(16, 16, Lux.σ), Flux.Dense(16, 1))
+# θ, re = destructure(chain)
+# # Discretization
+# dx = 0.05
+# discretization = PhysicsInformedNN(chain, GridTraining(dx))
+
+# @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+# pinnrep = symbolic_discretize(pde_system, discretization)
+# typeof(pinnrep.phi)
+# typeof(pinnrep.phi)
+# typeof(re)
+# pinnrep.phi([1, 2], θ)
+
+# typeof(θ)
+
+# print(pinnrep)
+# pinnrep.eqs
+# pinnrep.bcs
+# pinnrep.domains
+# pinnrep.eq_params
+# pinnrep.defaults
+# print(pinnrep.default_p)
+# pinnrep.param_estim
+# print(pinnrep.additional_loss)
+# pinnrep.adaloss
+# pinnrep.depvars
+# pinnrep.indvars
+# pinnrep.dict_depvar_input
+# pinnrep.dict_depvars
+# pinnrep.dict_indvars
+# print(pinnrep.logger)
+# pinnrep.multioutput
+# pinnrep.iteration
+# pinnrep.init_params
+# pinnrep.flat_init_params
+# pinnrep.phi
+# pinnrep.derivative
+# pinnrep.strategy
+# pinnrep.pde_indvars
+# pinnrep.bc_indvars
+# pinnrep.pde_integration_vars
+# pinnrep.bc_integration_vars
+# pinnrep.integral
+# pinnrep.symbolic_pde_loss_functions
+# pinnrep.symbolic_bc_loss_functions
+# pinnrep.loss_functions
+
+# #  = discretize(pde_system, discretization)
+# prob = symbolic_discretize(pde_system, discretization)
+# # "The boundary condition loss functions"
+# sum([prob.loss_functions.bc_loss_functions[i](θ) for i in eachindex(1:4)])
+# sum([prob.loss_functions.pde_loss_functions[i](θ) for i in eachindex(1)])
+
+# prob.loss_functions.full_loss_function(θ, 32)
+
+# prob.loss_functions.bc_loss_functions[1](θ)
+
+# prob.loss_functions.bc_loss_functions
+# prob.loss_functions.full_loss_function
+# prob.loss_functions.additional_loss_function
+# prob.loss_functions.pde_loss_functions
+
+# 1.3953060473003345 + 1.378102161087438 + 1.395376727128639 + 1.3783868705075002 +
+# 0.22674532775196876
+# # "The PDE loss functions"
+# prob.loss_functions.pde_loss_functions
+# prob.loss_functions.pde_loss_functions[1](θ)
+# # "The full loss function, combining the PDE and boundary condition loss functions.This is the loss function that is used by the optimizer."
+# prob.loss_functions.full_loss_function(θ, nothing)
+# prob.loss_functions.full_loss_function(θ, 423423)
+
+# # "The wrapped `additional_loss`, as pieced together for the optimizer."
+# prob.loss_functions.additional_loss_function
+# # "The pre-data version of the PDE loss function"
+# prob.loss_functions.datafree_pde_loss_functions
+# # "The pre-data version of the BC loss function"
+# prob.loss_functions.datafree_bc_loss_functions
+
+# using Random
+# θ, st = Lux.setup(Random.default_rng(), chain)
+# #Optimizer
+# opt = OptimizationOptimJL.BFGS()
+
+# #Callback function
+# callback = function (p, l)
+#     println("Current loss is: $l")
+#     return false
+# end
+
+# res = Optimization.solve(prob, opt, callback = callback, maxiters = 1000)
+# phi = discretization.phi
+
+# # ------------------------------------------------
+# using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, OrdinaryDiffEq,
+#       Plots
+# import ModelingToolkit: Interval, infimum, supremum
+# @parameters t, σ_, β, ρ
+# @variables x(..), y(..), z(..)
+# Dt = Differential(t)
+# eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+#     Dt(y(t)) ~ x(t) * (ρ - z(t)) - y(t),
+#     Dt(z(t)) ~ x(t) * y(t) - β * z(t)]
+
+# bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+# domains = [t ∈ Interval(0.0, 1.0)]
+# dt = 0.01
+
+# input_ = length(domains)
+# n = 8
+# chain1 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+# chain2 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+# chain3 = Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, n, Lux.σ),
+#                    Lux.Dense(n, 1))
+
+# function lorenz!(du, u, p, t)
+#     du[1] = 10.0 * (u[2] - u[1])
+#     du[2] = u[1] * (28.0 - u[3]) - u[2]
+#     du[3] = u[1] * u[2] - (8 / 3) * u[3]
+# end
+
+# u0 = [1.0; 0.0; 0.0]
+# tspan = (0.0, 1.0)
+# prob = ODEProblem(lorenz!, u0, tspan)
+# sol = solve(prob, Tsit5(), dt = 0.1)
+# ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
+# function getData(sol)
+#     data = []
+#     us = hcat(sol(ts).u...)
+#     ts_ = hcat(sol(ts).t...)
+#     return [us, ts_]
+# end
+# data = getData(sol)
+
+# (u_, t_) = data
+# len = length(data[2])
+
+# depvars = [:x, :y, :z]
+# function additional_loss(phi, θ, p)
+#     return sum(sum(abs2, phi[i](t_, θ[depvars[i]]) .- u_[[i], :]) / len for i in 1:1:3)
+# end
+
+# discretization = NeuralPDE.PhysicsInformedNN([chain1, chain2, chain3],
+#                                              NeuralPDE.GridTraining(dt),
+#                                              param_estim = false,
+#                                              additional_loss = additional_loss)
+# @named pde_system = PDESystem(eqs, bcs, domains, [t], [x(t), y(t), z(t)], [σ_, ρ, β],
+#                               defaults = Dict([p .=> 1.0 for p in [σ_, ρ, β]]))
+# prob = NeuralPDE.discretize(pde_system, discretization)
+# callback = function (p, l)
+#     println("Current loss is: $l")
+#     return false
+# end
+# res = Optimization.solve(prob, BFGS(); callback = callback, maxiters = 5000)
+# p_ = res.u[(end - 2):end] # p_ = [9.93, 28.002, 2.667]
+
+# minimizers = [res.u.depvar[depvars[i]] for i in 1:3]
+# ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
+# u_predict = [[discretization.phi[i]([t], minimizers[i])[1] for t in ts] for i in 1:3]
+# plot(sol)
+# plot!(ts, u_predict, label = ["x(t)" "y(t)" "z(t)"])
+
+# discretization.multioutput
+# discretization.chain
+# discretization.strategy
+# discretization.init_params
+# discretization.phi
+# discretization.derivative
+# discretization.param_estim
+# discretization.additional_loss
+# discretization.adaptive_loss
+# discretization.logger
+# discretization.log_options
+# discretization.iteration
+# discretization.self_increment
+# discretization.multioutput
+# discretization.kwargs
+
+# struct BNNODE1{P <: Vector{<:Distribution}}
+#     chain::Any
+#     Kernel::Any
+#     draw_samples::UInt32
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE1(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
+#                      l2std = [0.05], phystd = [0.05])
+#         BNNODE1(chain, Kernel, draw_samples, priorsNNw, param, l2std, phystd)
+#     end
+# end
+
+# struct BNNODE3{C, K, P <: Union{Any, Vector{<:Distribution}}}
+#     chain::C
+#     Kernel::K
+#     draw_samples::UInt32
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE3(chain, Kernel; draw_samples = 2000, priorsNNw = (0.0, 3.0), param = [],
+#                      l2std = [0.05], phystd = [0.05])
+#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain, Kernel, draw_samples,
+#                                                           priorsNNw, param, l2std, phystd)
+#     end
+# end
+# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+# linear = (u, p, t) -> cos(2 * π * t)
+# tspan = (0.0, 2.0)
+# u0 = 0.0
+# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+
+# ta = range(tspan[1], tspan[2], length = 300)
+# u = [linear_analytic(u0, nothing, ti) for ti in ta]
+# sol1 = solve(prob, Tsit5())
+
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂[1:100], time[1:100]]
+
+# # Call BPINN, create chain
+# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
+# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+# HMC
+# solve(prob, BNNODE(chainflux, HMC))
+# BNNODE1(chainflux, HMC, 2000)
+
+# draw_samples = 2000
+# priorsNNw = (0.0, 3.0)
+# param = []
+# l2std = [0.05]
+# phystd = [0.05]
+# @time BNNODE3(chainflux, HMC, draw_samples = 2000, priorsNNw = (0.0, 3.0),
+#               param = [nothing],
+#               l2std = [0.05], phystd = [0.05])
+# typeof(Nothing) <: Vector{<:Distribution}
+# Nothing <: Distribution
+# {UnionAll} <: Distribution
+# @time [Nothing]
+# typeof([Nothing])
+# @time [1]
+
+# function test1(sum; c = 23, d = 32)
+#     return sum + c + d
+# end
+# function test(a, b; c, d)
+#     return test1(a + b, c, d)
+# end
+
+# test(2, 2)
+
+# struct BNNODE3{C, K, P <: Union{Vector{Nothing}, Vector{<:Distribution}}}
+#     chain::C
+#     Kernel::K
+#     draw_samples::Int64
+#     priorsNNw::Tuple{Float64, Float64}
+#     param::P
+#     l2std::Vector{Float64}
+#     phystd::Vector{Float64}
+
+#     function BNNODE3(chain, Kernel; draw_samples,
+#                      priorsNNw, param = [nothing], l2std, phystd)
+#         new{typeof(chain), typeof(Kernel), typeof(param)}(chain,
+#                                                           Kernel,
+#                                                           draw_samples,
+#                                                           priorsNNw,
+#                                                           param, l2std,
+#                                                           phystd)
+#     end
+# end
+
+# function solve1(prob::DiffEqBase.AbstractODEProblem, alg::BNNODE3;
+#                 dataset = [nothing], dt = 1 / 20.0,
+#                 init_params = nothing, nchains = 1,
+#                 autodiff = false, Integrator = Leapfrog,
+#                 Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
+#                 Metric = DiagEuclideanMetric, jitter_rate = 3.0,
+#                 tempering_rate = 3.0, max_depth = 10, Δ_max = 1000,
+#                 n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = true,
+#                 verbose = false)
+#     chain = alg.chain
+#     l2std = alg.l2std
+#     phystd = alg.phystd
+#     priorsNNw = alg.priorsNNw
+#     Kernel = alg.Kernel
+#     draw_samples = alg.draw_samples
+
+#     param = alg.param == [nothing] ? [] : alg.param
+#     mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain, dataset = dataset,
+#                                                             draw_samples = draw_samples,
+#                                                             init_params = init_params,
+#                                                             physdt = dt, l2std = l2std,
+#                                                             phystd = phystd,
+#                                                             priorsNNw = priorsNNw,
+#                                                             param = param,
+#                                                             nchains = nchains,
+#                                                             autodiff = autodiff,
+#                                                             Kernel = Kernel,
+#                                                             Integrator = Integrator,
+#                                                             Adaptor = Adaptor,
+#                                                             targetacceptancerate = targetacceptancerate,
+#                                                             Metric = Metric,
+#                                                             jitter_rate = jitter_rate,
+#                                                             tempering_rate = tempering_rate,
+#                                                             max_depth = max_depth,
+#                                                             Δ_max = Δ_max,
+#                                                             n_leapfrog = n_leapfrog, δ = δ,
+#                                                             λ = λ, progress = progress,
+#                                                             verbose = verbose)
+# end
+
+# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+# linear = (u, p, t) -> cos(2 * π * t)
+# tspan = (0.0, 2.0)
+# u0 = 0.0
+# prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+
+# ta = range(tspan[1], tspan[2], length = 300)
+# u = [linear_analytic(u0, nothing, ti) for ti in ta]
+# # sol1 = solve(prob, Tsit5())
+
+# # BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+# x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+# time = vec(collect(Float64, ta))
+# dataset = [x̂[1:100], time[1:100]]
+
+# # Call BPINN, create chain
+# chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1))
+# chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+# HMC
+
+# solve1(prob, a)
+# a = BNNODE3(chainflux, HMC, draw_samples = 2000,
+#             priorsNNw = (0.0, 3.0),
+#             l2std = [0.05], phystd = [0.05])
+
+# Define Lotka-Volterra model.
+function lotka_volterra1(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 6.0)
+prob = ODEProblem(lotka_volterra1, u0, tspan, p)
+solution = solve(prob, Tsit5(); saveat = 0.05)
+
+as = reduce(hcat, solution.u)
+as[1, :]
+# Plot simulation.
+time = solution.t
+u = hcat(solution.u...)
+# BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct
+x = u[1, :] + 0.5 * randn(length(u[1, :]))
+y = u[2, :] + 0.5 * randn(length(u[1, :]))
+dataset = [x[1:50], y[1:50], time[1:50]]
+# scatter!(time, [x, y])
+# scatter!(dataset[3], [dataset[2], dataset[1]])
+
+# NN has 2 outputs as u -> [dx,dy]
+chainlux1 = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh),
+    Lux.Dense(6, 2))
+chainflux1 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2))
+
+fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0, 3.0), progress = true)
+ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0, 3.0), progress = true)
+
+#     2×171 Matrix{Float64}:
+#  -0.5  -0.518956  -0.529639  …  -1.00266  -1.01049
+#   2.0   1.97109    1.92747       0.42619   0.396335
+
+#     2-element Vector{Float64}:
+#  -119451.94949911036
+#  -128543.23714618056
+
+# alg = NeuralPDE.BNNODE(chainflux1,
+#     dataset = dataset,
+#     draw_samples = 1000,
+#     l2std = [
+#         0.05,
+#         0.05,
+#     ],
+#     phystd = [
+#         0.05,
+#         0.05,
+#     ],
+#     priorsNNw = (0.0,
+#         3.0),
+#     param = [
+#         Normal(4.5,
+#             5),
+#         Normal(7,
+#             2),
+#         Normal(5,
+#             2),
+#         Normal(-4,
+#             6),
+#     ],
+#     n_leapfrog = 30, progress = true)
+
+# sol3flux_pestim = solve(prob, alg)
+
+#  ----------------------------------------------
+# original paper implementation
+# 25 points 
+run1  #7.70593 Particles{Float64, 1}
+run2 #6.66347 Particles{Float64, 1} 
+run3 #6.84827 Particles{Float64, 1} 
+
+# 50 points 
+run1 #7.83577 Particles{Float64, 1}
+run2 #6.49477 Particles{Float64, 1}
+run3 #6.47421 Particles{Float64, 1}
+
+# 100 points 
+run1 #5.96604 Particles{Float64, 1}
+run2 #6.05432 Particles{Float64, 1}
+run3 #6.08856 Particles{Float64, 1}
+
+# Full likelihood(uses total variation regularized differentiation) 
+# 25 points 
+run1 #6.41722 Particles{Float64, 1}
+run2 #6.42782 Particles{Float64, 1}
+run3 #6.42782 Particles{Float64, 1}
+
+# 50 points
+run1 #5.71268 Particles{Float64, 1}
+run2 #5.74599 Particles{Float64, 1}
+run3 #5.74599 Particles{Float64, 1}
+
+# 100 points  
+run1 #6.59097 Particles{Float64, 1}
+run2 #6.62813 Particles{Float64, 1}
+run3 #6.62813 Particles{Float64, 1}
+
+using Plots, StatsPlots
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 6.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+# Plot simulation.
+
+solution = solve(prob, Tsit5(); saveat = 0.05)
+plot(solve(prob, Tsit5()))
+
+# Dataset creation for parameter estimation
+time = solution.t
+u = hcat(solution.u...)
+x = u[1, :] + 0.5 * randn(length(u[1, :]))
+y = u[2, :] + 0.5 * randn(length(u[1, :]))
+dataset = [x, y, time]
+
+# Neural Networks must have 2 outputs as u -> [dx,dy] in function lotka_volterra()
+chainflux = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2)) |>
+            Flux.f64
+
+chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2))
+
+alg1 = NeuralPDE.BNNODE(chainflux,
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [
+        0.01,
+        0.01,
+    ],
+    phystd = [
+        0.01,
+        0.01,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(1.5,
+            0.5),
+        LogNormal(1.2,
+            0.5),
+        LogNormal(3.3,
+            1),
+        LogNormal(1.4,
+            1)],
+    n_leapfrog = 30, progress = true)
+
+sol_flux_pestim = solve(prob, alg1)
+
+# Dataset not needed as we are solving the equation with ideal parameters
+alg2 = NeuralPDE.BNNODE(chainlux,
+    draw_samples = 1000,
+    l2std = [
+        0.05,
+        0.05,
+    ],
+    phystd = [
+        0.05,
+        0.05,
+    ],
+    priorsNNw = (0.0,
+        3.0),
+    n_leapfrog = 30, progress = true)
+
+sol_lux = solve(prob, alg2)
+
+#testing timepoints must match keyword arg `saveat`` timepoints of solve() call
+t = collect(Float64, prob.tspan[1]:(1 / 50.0):prob.tspan[2])
+
+# plotting solution for x,y for chain_flux
+plot(t, sol_flux_pestim.ensemblesol[1])
+plot!(t, sol_flux_pestim.ensemblesol[2])
+
+plot(sol_flux_pestim.ens1mblesol[1])
+plot!(sol_flux_pestim.ensemblesol[2])
+
+# estimated ODE parameters by .estimated_ode_params, weights and biases by .estimated_nn_params
+sol_flux_pestim.estimated_nn_params
+sol_flux_pestim.estimated_ode_params
+
+# plotting solution for x,y for chain_lux
+plot(t, sol_lux.ensemblesol[1])
+plot!(t, sol_lux.ensemblesol[2])
+
+# estimated weights and biases by .estimated_nn_params for chain_lux
+sol_lux.estimated_nn_params
+
+# # ----------------------------------stats-----------------------------
+# #   ----------------------------
+# # -----------------------------
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:47 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:38 
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:12 
+# #   --------------------------
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:05:09 
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:47 
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:25 
+# #   --------------
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:47 
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:54
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:46
+# # ------------------------
+# # -----------------------
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:04:06
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:32
+
+# physics Logpdf is : -15740.509286661572
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -882.2934218498742
+# L2loss2 Logpdf is : -3118.0639515039957
+# Sampling 100%|███████████████████████████████| Time: 0:03:01 
+# # --------------------------
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:02
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:08
+
+# physics Logpdf is : -18864.79640643607
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -1411.1717435511828
+# L2loss2 Logpdf is : -6242.351071278482
+# Sampling 100%|███████████████████████████████| Time: 0:04:15
+# # ----------------------------
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:05:37
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:02
+
+# physics Logpdf is : -25119.77191296288
+# prior Logpdf is : -139.5069300318621
+# L2lossData Logpdf is : -3240.067149411982
+# L2loss2 Logpdf is : -12497.32657780532
+# Sampling 100%|███████████████████████████████| Time: 0:06:13
+
+using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL
+import ModelingToolkit: Interval, infimum, supremum
+
+using NeuralPDE, Flux, OptimizationOptimisers
+
+function diffeq(u, p, t)
+    u1, u2 = u
+    return [u2, p[1] + p[2] * sin(u1) + p[3] * u2]
+end
+p = [5, -10, -1.7]
+u0 = [-1.0, 7.0]
+tspan = (0.0, 10.0)
+prob = ODEProblem(ODEFunction(diffeq), u0, tspan, p)
+
+chainnew = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh), Flux.Dense(8, 2)) |>
+           Flux.f64
+
+opt = OptimizationOptimisers.Adam(0.1)
+opt = Optimisers.ADAGrad(0.1)
+opt = Optimisers.AdaMax(0.01)
+algnew = NeuralPDE.NNODE(chainnew, opt)
+solution_new = solve(prob, algnew, verbose = true,
+    abstol = 1e-10, maxiters = 7000)
+u = reduce(hcat, solution_new.u)
+plot(solution_new.t, u[1, :])
+plot!(solution_new.t, u[2, :])
+
+algnew = NeuralPDE.BNNODE(chainnew, draw_samples = 200,
+    n_leapfrog = 30, progress = true)
+solution_new = solve(prob, algnew)
+
+@parameters t
+@variables u1(..), u2(..)
+D = Differential(t)
+eq = [D(u1(t)) ~ u2(t),
+    D(u2(t)) ~ 5 - 10 * sin(u1(t)) - 1.7 * u2(t)];
+
+import ModelingToolkit: Interval
+bcs = [u1(0) ~ -1, u2(0) ~ 7]
+domains = [t ∈ Interval(0.0, 10.0)]
+dt = 0.01
+
+input_ = length(domains) # number of dimensions
+n = 16
+chain = [Lux.Chain(Lux.Dense(input_, n, Lux.σ), Lux.Dense(n, n, Lux.σ), Lux.Dense(n, 1))
+         for _ in 1:2]
+
+@named pde_system = PDESystem(eq, bcs, domains, [t], [u1(t), u2(t)])
+
+strategy = NeuralPDE.GridTraining(dt)
+discretization = PhysicsInformedNN(chain, strategy)
+sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+
+pde_loss_functions = sym_prob.loss_functions.pde_loss_functions
+bc_loss_functions = sym_prob.loss_functions.bc_loss_functions
+
+callback = function (p, l)
+    println("loss: ", l)
+    # println("pde_losses: ", map(l_ -> l_(p), pde_loss_functions))
+    # println("bcs_losses: ", map(l_ -> l_(p), bc_loss_functions))
+    return false
+end
+
+loss_functions = [pde_loss_functions; bc_loss_functions]
+
+function loss_function(θ, p)
+    sum(map(l -> l(θ), loss_functions))
+end
+
+f_ = OptimizationFunction(loss_function, Optimization.AutoZygote())
+prob = Optimization.OptimizationProblem(f_, sym_prob.flat_init_params)
+
+res = Optimization.solve(prob,
+    OptimizationOptimJL.BFGS();
+    callback = callback,
+    maxiters = 1000)
+phi = discretization.phi
\ No newline at end of file
diff --git a/test/BPINN_Tests.jl~f5b4f1cb (trying to sync) b/test/BPINN_Tests.jl~f5b4f1cb (trying to sync)
new file mode 100644
index 0000000000..cb0303daf0
--- /dev/null
+++ b/test/BPINN_Tests.jl~f5b4f1cb (trying to sync)	
@@ -0,0 +1,336 @@
+# # Testing Code
+using Test, MCMCChains
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using Flux, OptimizationOptimisers, AdvancedHMC, Lux
+using Statistics, Random, Functors, ComponentArrays
+using NeuralPDE, MonteCarloMeasurements
+
+# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
+# on latest Julia version it performs much better for below tests
+Random.seed!(100)
+
+## PROBLEM-1 (WITHOUT PARAMETER ESTIMATION)
+linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+linear = (u, p, t) -> cos(2 * π * t)
+tspan = (0.0, 2.0)
+u0 = 0.0
+prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+p = prob.p
+
+# Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
+ta = range(tspan[1], tspan[2], length = 300)
+u = [linear_analytic(u0, nothing, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+time = vec(collect(Float64, ta))
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# testing points for solve() call must match saveat(1/50.0) arg
+ta0 = range(tspan[1], tspan[2], length = 101)
+u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
+x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
+time1 = vec(collect(Float64, ta0))
+physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+init1, re1 = destructure(chainflux)
+θinit, st = Lux.setup(Random.default_rng(), chainlux)
+
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux,
+    draw_samples = 2500)
+
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux,
+    draw_samples = 2500)
+
+# can change training strategies by adding this to call (Quadratuer and GridTraining show good results but stochastics sampling techniques perform bad)
+# strategy = QuadratureTraining(; quadrature_alg = QuadGKJL(),
+#     reltol = 1e-6,
+#     abstol = 1e-3, maxiters = 1000,
+#     batch = 0)
+
+alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500)
+sol1flux = solve(prob, alg)
+
+alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500)
+sol1lux = solve(prob, alg)
+
+# testing points
+t = time
+# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+out = re1.(fhsamples1[(end - 500):end])
+yu = collect(out[i](t') for i in eachindex(out))
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+θ = [vector_to_parameters(fhsamples1[i], θinit) for i in 2000:2500]
+luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# --------------------- ahmc_bayesian_pinn_ode() call
+@test mean(abs.(x̂ .- meanscurve1)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve1)) < 0.005
+@test mean(abs.(x̂ .- meanscurve2)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve2)) < 0.005
+
+#--------------------- solve() call 
+@test mean(abs.(x̂1 .- sol1flux.ensemblesol[1])) < 0.05
+@test mean(abs.(physsol0_1 .- sol1flux.ensemblesol[1])) < 0.05
+@test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
+@test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
+
+## PROBLEM-1 (WITH PARAMETER ESTIMATION)
+linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+linear = (u, p, t) -> cos(p * t)
+tspan = (0.0, 2.0)
+u0 = 0.0
+p = 2 * pi
+prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
+
+# Numerical and Analytical Solutions
+sol1 = solve(prob, Tsit5(); saveat = 0.01)
+u = sol1.u
+time = sol1.t
+
+# BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
+ta = range(tspan[1], tspan[2], length = 100)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
+ta0 = range(tspan[1], tspan[2], length = 101)
+u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
+time1 = vec(collect(Float64, ta0))
+physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+init1, re1 = destructure(chainflux1)
+θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0,
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(9,
+            0.5),
+    ])
+
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+    dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)])
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2500, physdt = 1 / 50.0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)])
+
+sol2flux = solve(prob, alg)
+
+alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0,
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(9,
+            0.5),
+    ])
+
+sol2lux = solve(prob, alg)
+
+# testing points
+t = time
+# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+out = re1.([fhsamples1[i][1:22] for i in 2000:2500])
+yu = collect(out[i](t') for i in eachindex(out))
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+θ = [vector_to_parameters(fhsamples2[i][1:(end - 1)], θinit) for i in 2000:2500]
+luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# --------------------- ahmc_bayesian_pinn_ode() call  
+@test mean(abs.(physsol1 .- meanscurve1)) < 0.15
+@test mean(abs.(physsol1 .- meanscurve2)) < 0.15
+
+# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.35 * p)
+@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.35 * p)
+
+#-------------------------- solve() call  
+@test mean(abs.(physsol1_1 .- sol2flux.ensemblesol[1])) < 8e-2
+@test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
+
+# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+@test abs(p - sol2flux.estimated_de_params[1]) < abs(0.15 * p)
+@test abs(p - sol2lux.estimated_de_params[1]) < abs(0.15 * p)
+
+## PROBLEM-2
+linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = -5.0
+prob = ODEProblem(linear, u0, tspan, p)
+linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
+
+# SOLUTION AND CREATE DATASET
+sol = solve(prob, Tsit5(); saveat = 0.1)
+u = sol.u
+time = sol.t
+x̂ = u .+ (u .* 0.2) .* randn(size(u))
+dataset = [x̂, time]
+t = sol.t
+physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)]
+
+ta0 = range(tspan[1], tspan[2], length = 501)
+u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+time1 = vec(collect(Float64, ta0))
+physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux12 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh),
+    Flux.Dense(6, 1)) |> Flux.f64
+chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+init1, re1 = destructure(chainflux12)
+θinit, st = Lux.setup(Random.default_rng(), chainlux12)
+
+fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob,
+    chainflux12,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03],
+    priorsNNw = (0.0,
+        10.0))
+
+fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(prob,
+    chainflux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ])
+
+fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0))
+
+fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ])
+
+alg = NeuralPDE.BNNODE(chainflux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ])
+
+sol3flux_pestim = solve(prob, alg)
+
+alg = NeuralPDE.BNNODE(chainlux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ])
+
+sol3lux_pestim = solve(prob, alg)
+
+# testing timepoints
+t = sol.t
+#------------------------------ ahmc_bayesian_pinn_ode() call 
+# Mean of last 500 sampled parameter's curves(flux chains)[Ensemble predictions]
+out = re1.([fhsamplesflux12[i][1:61] for i in 1000:1500])
+yu = [out[i](t') for i in eachindex(out)]
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1_1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+out = re1.([fhsamplesflux22[i][1:61] for i in 1000:1500])
+yu = [out[i](t') for i in eachindex(out)]
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1_2 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+@test mean(abs.(sol.u .- meanscurve1_1)) < 1e-2
+@test mean(abs.(physsol1 .- meanscurve1_1)) < 1e-2
+@test mean(abs.(sol.u .- meanscurve1_2)) < 5e-2
+@test mean(abs.(physsol1 .- meanscurve1_2)) < 5e-2
+
+# estimated parameters(flux chain)
+param1 = mean(i[62] for i in fhsamplesflux22[1000:1500])
+@test abs(param1 - p) < abs(0.3 * p)
+
+# Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
+θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
+luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500]
+luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+@test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
+@test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
+@test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
+@test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
+
+# estimated parameters(lux chain)
+param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
+@test abs(param1 - p) < abs(0.3 * p)
+
+#-------------------------- solve() call 
+# (flux chain)
+@test mean(abs.(physsol2 .- sol3flux_pestim.ensemblesol[1])) < 0.15
+# estimated parameters(flux chain)
+param1 = sol3flux_pestim.estimated_de_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
+
+# (lux chain)
+@test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
+# estimated parameters(lux chain)
+param1 = sol3lux_pestim.estimated_de_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
\ No newline at end of file

From 16358010638023f76a5d7ae2fa5b50f545a4602e Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 3 Feb 2024 17:00:05 +0530
Subject: [PATCH 063/107] most of logic done

---
 src/PDE_BPINN.jl              |  28 ++-
 test/BPINN_PDEinvsol_tests.jl | 344 ++++++++++++++++++++++++++++++++++
 2 files changed, 370 insertions(+), 2 deletions(-)
 create mode 100644 test/BPINN_PDEinvsol_tests.jl

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index c57bcd71cb..81d0abaac8 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -55,6 +55,8 @@ function L2LossData(ltd::PDELogTargetDensity, θ)
 
     # dataset of form Vector[matrix_x, matrix_y, matrix_z]
     # matrix_i is of form [i,indvar1,indvar2,..] (needed in case if heterogenous domains)
+    # note that indvar1,indvar2.. cols can be different values for different depvar matrices
+    # order follows pinnrep.depvars orders of variables (order of declaration in @variables macro)
 
     # Phi is the trial solution for each NN in chain array
     # Creating logpdf( MvNormal(Phi(t,θ),std), dataset[i] )
@@ -88,8 +90,6 @@ function priorlogpdf(ltd::PDELogTargetDensity, θ)
     invlogpdf = sum((length(θ) - ltd.extraparams + 1):length(θ)) do i
         logpdf(invpriors[length(θ) - i + 1], θ[i])
     end
-
-    return invlogpdf + logpdf(nnwparams, θ[1:(length(θ) - ltd.extraparams)])
 end
 
 function integratorchoice(Integratorkwargs, initial_ϵ)
@@ -243,6 +243,30 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     pinnrep = symbolic_discretize(pde_system, discretization)
     dataset_pde, dataset_bc = discretization.dataset
 
+    eqs = pinnrep.eqs
+    yuh1 = get_loss_2(pinnrep, dataset_pde, eqs)
+    eqs = pinnrep.bcs
+    yuh2 = get_loss_2(pinnrep, dataset_bc, eqs)
+
+    pde_loss_functions, bc_loss_functions = merge_dataset_with_loss_function(pinnrep,
+        dataset,
+        yuh1,
+        yuh2)
+
+    function L2_loss2(θ, allstd)
+        stdpdes, stdbcs, stdextra = allstd
+        pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
+                              for (i, pde_loss_function) in enumerate(pde_loss_functions)]
+
+        bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+                             for (j, bc_loss_function) in enumerate(bc_loss_functions)]
+        println("pde_loglikelihoods : ", pde_loglikelihoods)
+        println("bc_loglikelihoods : ", bc_loglikelihoods)
+        return sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+    end
+
+    println(L2_loss2)
+    # WIP split dataset to respective equations
     if ((dataset_bc isa Nothing) && (dataset_pde isa Nothing))
         dataset = nothing
     elseif dataset_bc isa Nothing
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
new file mode 100644
index 0000000000..4876328413
--- /dev/null
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -0,0 +1,344 @@
+using Test, MCMCChains, Lux, ModelingToolkit
+import ModelingToolkit: Interval, infimum, supremum
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using Flux, AdvancedHMC, Statistics, Random, Functors
+using NeuralPDE, MonteCarloMeasurements
+using ComponentArrays, ModelingToolkit
+
+Random.seed!(100)
+
+# Cos(pit) periodic curve (Parameter Estimation)
+println("Example 1, 2d Periodic System")
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainf = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 1)) |> Flux.f64
+init1, re1 = Flux.destructure(chainf)
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+# plot(dataset[1][:, 2], dataset[1][:, 1])
+# plot!(timepoints, u)
+
+# checking all training strategies
+discretization = NeuralPDE.BayesianPINN([chainl],
+    StochasticTraining(200),
+    param_estim = true, dataset = [dataset, nothing])
+
+ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)], progress = true)
+
+discretization = NeuralPDE.BayesianPINN([chainl],
+    QuasiRandomTraining(200),
+    param_estim = true, dataset = [dataset, nothing])
+
+ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)])
+
+discretization = NeuralPDE.BayesianPINN([chainl],
+    QuadratureTraining(), param_estim = true, dataset = [dataset, nothing])
+
+ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)])
+
+discretization = NeuralPDE.BayesianPINN([chainl],
+    GridTraining([0.02]),
+    param_estim = true, dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)])
+
+discretization = NeuralPDE.BayesianPINN([chainf],
+    GridTraining([0.02]), param_estim = true, dataset = [dataset, nothing])
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.03],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)])
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+ts = vec(sol2.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol2.ensemblesol[1])
+
+@test u_predict≈u_real atol=0.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol2.estimated_de_params[1]≈param atol=param * 0.3
+
+## Example Lorenz System (Parameter Estimation)
+println("Example 2, Lorenz System")
+@parameters t, σ_
+@variables x(..), y(..), z(..)
+Dt = Differential(t)
+eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 1.0)]
+
+input_ = length(domains)
+n = 7
+chain = [
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+]
+
+#Generate Data
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
+end
+
+u0 = [1.0; 0.0; 0.0]
+tspan = (0.0, 1.0)
+prob = ODEProblem(lorenz!, u0, tspan)
+sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+ts = sol.t
+us = hcat(sol.u...)
+us = us .+ ((0.05 .* randn(size(us))) .* us)
+ts_ = hcat(sol(ts).t...)[1, :]
+dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+# using Plots, StatsPlots
+# plot(hcat(sol.u...)[1, :], hcat(sol.u...)[2, :], hcat(sol.u...)[3, :])
+# plot!(dataset[1][:, 1], dataset[2][:, 1], dataset[3][:, 1])
+# plot(dataset[1][:, 2:end], dataset[1][:, 1])
+# plot!(dataset[2][:, 2:end], dataset[2][:, 1])
+# plot!(dataset[3][:, 2:end], dataset[3][:, 1])
+
+discretization = NeuralPDE.BayesianPINN(chain, NeuralPDE.GridTraining([0.01]);
+    param_estim = true, dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs, bcs, domains,
+    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 20,
+    bcstd = [0.3, 0.3, 0.3],
+    phystd = [0.1, 0.1, 0.1],
+    l2std = [1, 1, 1],
+    priorsNNw = (0.0, 1.0),
+    saveats = [0.01],
+    param = [Normal(12.0, 2)])
+
+idealp = 10.0
+p_ = sol1.estimated_de_params[1]
+
+# plot(pmean(sol1.ensemblesol[1]), pmean(sol1.ensemblesol[2]), pmean(sol1.ensemblesol[3]))
+# plot(sol1.timepoints[1]', pmean(sol1.ensemblesol[1]))
+# plot!(sol1.timepoints[2]', pmean(sol1.ensemblesol[2]))
+# plot!(sol1.timepoints[3]', pmean(sol1.ensemblesol[3]))
+
+@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+
+# # NEW LOSS FUNCTION CODE
+# pinnrep = symbolic_discretize(pde_system, discretization)
+
+# # general equation with diff
+# # now 1> substute u(t), phi(t) values from dataset and get multiple equations
+# # phi[i] must be in numeric_derivative() form
+# # derivative(phi, u, [x, y], εs, order, θ) - use parse_equations() and interp object to create loss function
+# # this function must take interp objects(train sets)
+# # dataset - get u(t), t from dataset interpolations object
+# # make lhs-rhs loss
+# # sum losses
+
+# using DataInterpolations
+
+# # dataset_pde has normal matrix format 
+# # dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
+# function get_symbols(dict_depvar_input, dataset, depvars)
+#     # get datasets into splattable form
+#     splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+#     # splat datasets onto Linear interpolations tables
+#     interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+#     interps = Dict(depvars .=> interps)
+
+#     Dict_symbol_interps = Dict(depvar => (interps[depvar], dict_depvar_input[depvar])
+#                                for depvar in depvars)
+
+#     tobe_subs = Dict()
+#     for (a, b) in dict_depvar_input
+#         tobe_subs[a] = eval(:($a($(b...))))
+#     end
+
+#     to_subs = Dict()
+#     for (a, b) in Dict_symbol_interps
+#         b1, b2 = b
+#         to_subs[a] = eval(:($b1($(b2...))))
+#     end
+#     return to_subs, tobe_subs
+# end
+
+# function recur_expression(exp, Dict_differentials)
+#     for in_exp in exp.args
+#         if !(in_exp isa Expr)
+#             # skip +,== symbols, characters etc
+#             continue
+
+#         elseif in_exp.args[1] isa ModelingToolkit.Differential
+#             # first symbol of differential term
+#             # Dict_differentials for masking differential terms
+#             # and resubstituting differentials in equations after putting in interpolations
+#             Dict_differentials[eval(in_exp)] = Symbol("diff_$(length(Dict_differentials)+1)")
+#             return
+
+#         else
+#             recur_expression(in_exp, Dict_differentials)
+#         end
+#     end
+# end
+
+# # get datafree loss functions for new loss type
+# # need to call merge_strategy_with_loss_function() variant after this
+# function merge_dataset_with_loss_function(pinnrep::NeuralPDE.PINNRepresentation,
+#         dataset,
+#         datafree_pde_loss_function,
+#         datafree_bc_loss_function)
+#     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
+
+#     eltypeθ = eltype(pinnrep.flat_init_params)
+
+#     train_sets = [[dataset[i][:, 2] for i in eachindex(dataset)], [[0;;], [0;;], [0;;]]]
+
+#     # the points in the domain and on the boundary
+#     pde_train_sets, bcs_train_sets = train_sets
+#     # pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+#     #     pde_train_sets)
+#     # bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+#     #     bcs_train_sets)
+#     pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
+#                           for (_loss, _set) in zip(datafree_pde_loss_function,
+#         pde_train_sets)]
+
+#     bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
+#                          for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
+
+#     pde_loss_functions, bc_loss_functions
+# end
+
+# function get_loss_function(loss_function, train_set, eltypeθ; τ = nothing)
+#     loss = (θ) -> mean(abs2, loss_function(train_set, θ))
+# end
+
+# # for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
+# # and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
+# # eqs is vector of pde eqs and dataset here is dataset_pde
+# # normally you get vector of losses
+# function get_loss_2(pinnrep, dataset, eqs)
+#     depvars = pinnrep.depvars # order is same as dataset and interps
+#     dict_depvar_input = pinnrep.dict_depvar_input
+
+#     to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars)
+#     interp_subs_dict = Dict(tobe_subs[depvar] => to_subs[depvar] for depvar in depvars)
+
+#     Dict_differentials = Dict()
+#     exp = toexpr(eqs)
+#     void_value = [recur_expression(exp_i, Dict_differentials) for exp_i in exp]
+#     # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
+
+#     # masking operation
+#     a = substitute.(eqs, Ref(Dict_differentials))
+#     b = substitute.(a, Ref(interp_subs_dict))
+#     # reverse dict for re-substituing values of Differential(t)(u(t)) etc
+#     rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
+#     eqs = substitute.(b, Ref(rev_Dict_differentials))
+#     # get losses
+#     loss_functions = [NeuralPDE.build_loss_function(pinnrep,
+#         eqs[i],
+#         pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
+# end
+
+# eqs = pde_system.eqs
+# yuh1 = get_loss_2(pinnrep, dataset, eqs)
+# eqs = pinnrep.bcs
+# yuh2 = get_loss_2(pinnrep, dataset, eqs)
+
+# pde_loss_functions, bc_loss_functions = merge_dataset_with_loss_function(pinnrep,
+#     dataset,
+#     yuh1,
+#     yuh2)
+
+# pde_loss_functions()
+# # logic for recursion formula to parse differentials
+# # # this below has the whole differential term
+# # toexpr(pde_system.eqs[1]).args[2].args[3].args[3] isa ModelingToolkit.Differential
+# # toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
+# # # .args[1] isa ModelingToolkit.Differential
+
+# # logic for interpolation and indvars splatting to get Equation parsing terms
+# # splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+# # # splat datasets onto Linear interpolations tables
+# # interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+# # interps = Dict(depvars .=> interps)
+# # get datasets into splattable form
+# # splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
+# # # splat datasets onto Linear interpolations tables
+# # yu = [LinearInterpolation(splat_i...) for splat_i in splat_form]
+# # Symbol(:($(yu[1])))
+
+# # logic to contrauct dict to feed for masking
+# # Dict(interps[depvar] => dict_depvar_input[depvar] for depvar in depvars)
\ No newline at end of file

From 2339a1674de3d5dbc67ac0fe2d9f2d5b67242bc6 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 01:43:13 +0530
Subject: [PATCH 064/107] removed duplicate methods

---
 src/PDE_BPINN.jl | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 81d0abaac8..ae5c9d98c8 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -177,6 +177,27 @@ function inference(samples, pinnrep, saveats, numensemble, ℓπ)
     return ensemblecurves, estimatedLuxparams, estimated_params, timepoints
 end
 
+function integratorchoice(Integratorkwargs, initial_ϵ)
+    Integrator = Integratorkwargs[:Integrator]
+    if Integrator == JitteredLeapfrog
+        jitter_rate = Integratorkwargs[:jitter_rate]
+        Integrator(initial_ϵ, jitter_rate)
+    elseif Integrator == TemperedLeapfrog
+        tempering_rate = Integratorkwargs[:tempering_rate]
+        Integrator(initial_ϵ, tempering_rate)
+    else
+        Integrator(initial_ϵ)
+    end
+end
+
+function adaptorchoice(Adaptor, mma, ssa)
+    if Adaptor != AdvancedHMC.NoAdaptation()
+        Adaptor(mma, ssa)
+    else
+        AdvancedHMC.NoAdaptation()
+    end
+end
+
 """
     ahmc_bayesian_pinn_pde(pde_system, discretization;
         draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],

From dc9be780afc0c1b5b73a258cc0abfb6798160e5c Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 02:02:02 +0530
Subject: [PATCH 065/107] update BPINN_PDE_tests.jl

---
 test/BPINN_PDE_tests.jl | 128 ----------------------------------------
 1 file changed, 128 deletions(-)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 6a768533d4..1388722ce4 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -223,131 +223,3 @@ end
 
     @test u_predict≈u_real atol=0.8
 end
-
-@testitem "BPINN PDE Inv I: 1D Periodic System" tags=[:pdebpinn] begin
-    using MCMCChains, Lux, ModelingToolkit, Distributions, OrdinaryDiffEq,
-          AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
-          ComponentArrays
-    import ModelingToolkit: Interval, infimum, supremum
-
-    Random.seed!(100)
-
-    @parameters t p
-    @variables u(..)
-
-    Dt = Differential(t)
-    eqs = Dt(u(t)) - cos(p * t) ~ 0
-    bcs = [u(0) ~ 0.0]
-    domains = [t ∈ Interval(0.0, 2.0)]
-
-    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-    initl, st = Lux.setup(Random.default_rng(), chainl)
-
-    @named pde_system = PDESystem(eqs,
-        bcs,
-        domains,
-        [t],
-        [u(t)],
-        [p],
-        defaults = Dict([p => 4.0]))
-
-    analytic_sol_func1(u0, t) = u0 + sinpi(2t) / (2π)
-    timepoints = collect(0.0:(1 / 100.0):2.0)
-    u = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-    u = u .+ (u .* 0.2) .* randn(size(u))
-    dataset = [hcat(u, timepoints)]
-
-    @testset "$(nameof(typeof(strategy)))" for strategy in [
-        StochasticTraining(200),
-        QuasiRandomTraining(200),
-        GridTraining([0.02])
-    ]
-        discretization = BayesianPINN([chainl], strategy; param_estim = true,
-            dataset = [dataset, nothing])
-
-        sol1 = ahmc_bayesian_pinn_pde(pde_system,
-            discretization;
-            draw_samples = 1500,
-            bcstd = [0.05],
-            phystd = [0.01], l2std = [0.01],
-            priorsNNw = (0.0, 1.0),
-            saveats = [1 / 50.0],
-            param = [LogNormal(6.0, 0.5)])
-
-        param = 2 * π
-        ts = vec(sol1.timepoints[1])
-        u_real = [analytic_sol_func1(0.0, t) for t in ts]
-        u_predict = pmean(sol1.ensemblesol[1])
-
-        @test u_predict≈u_real atol=1.5
-        @test mean(u_predict .- u_real) < 0.1
-        @test sol1.estimated_de_params[1]≈param atol=param * 0.3
-    end
-end
-
-@testitem "BPINN PDE Inv II: Lorenz System" tags=[:pdebpinn] begin
-    using MCMCChains, Lux, ModelingToolkit, Distributions, OrdinaryDiffEq,
-          AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
-          ComponentArrays
-    import ModelingToolkit: Interval, infimum, supremum
-
-    Random.seed!(100)
-
-    @parameters t, σ_
-    @variables x(..), y(..), z(..)
-    Dt = Differential(t)
-    eqs = [
-        Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-        Dt(z(t)) ~ x(t) * y(t) - 8.0 / 3.0 * z(t)
-    ]
-
-    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-    domains = [t ∈ Interval(0.0, 1.0)]
-
-    input_ = length(domains)
-    n = 7
-    chain = [
-        Chain(Dense(input_, n, tanh), Dense(n, n, tanh), Dense(n, 1)),
-        Chain(Dense(input_, n, tanh), Dense(n, n, tanh), Dense(n, 1)),
-        Chain(Dense(input_, n, tanh), Dense(n, n, tanh), Dense(n, 1))
-    ]
-
-    # Generate Data
-    function lorenz!(du, u, p, t)
-        du[1] = 10.0 * (u[2] - u[1])
-        du[2] = u[1] * (28.0 - u[3]) - u[2]
-        du[3] = u[1] * u[2] - (8.0 / 3.0) * u[3]
-    end
-
-    u0 = [1.0; 0.0; 0.0]
-    tspan = (0.0, 1.0)
-    prob = ODEProblem(lorenz!, u0, tspan)
-    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-    ts = sol.t
-    us = hcat(sol.u...)
-    us = us .+ ((0.05 .* randn(size(us))) .* us)
-    ts_ = hcat(sol(ts).t...)[1, :]
-    dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-        dataset = [dataset, nothing])
-
-    @named pde_system = PDESystem(eqs, bcs, domains,
-        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 50,
-        bcstd = [0.3, 0.3, 0.3],
-        phystd = [0.1, 0.1, 0.1],
-        l2std = [1, 1, 1],
-        priorsNNw = (0.0, 1.0),
-        saveats = [0.01],
-        param = [Normal(12.0, 2)])
-
-    idealp = 10.0
-    p_ = sol1.estimated_de_params[1]
-    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-end

From 12ada6f0ce31b25873dbaf3f91c17b6488ae2672 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 02:03:46 +0530
Subject: [PATCH 066/107] update BPINN_PDE_tests.jl

---
 test/BPINN_PDE_tests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 1388722ce4..5589039584 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -222,4 +222,4 @@ end
     u_predict = pmean(sol.ensemblesol[1])
 
     @test u_predict≈u_real atol=0.8
-end
+end
\ No newline at end of file

From 8c1ea3f40f57eab58f9520586fd31f8ce396c69f Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Thu, 19 Oct 2023 17:48:04 -0400
Subject: [PATCH 067/107] Put new loglikelihood behind a conditional

---
 src/BPINN_ode.jl                 | 229 ---------
 src/advancedHMC_MCMC.jl          | 807 -------------------------------
 src/bayesian/BPINN_ode.jl        |   0
 src/bayesian/advancedHMC_MCMC.jl |   0
 src/bayesian/collocated_estim.jl | 194 ++++++++
 test/bpinnexperimental.jl        |  66 +++
 6 files changed, 260 insertions(+), 1036 deletions(-)
 delete mode 100644 src/BPINN_ode.jl
 delete mode 100644 src/advancedHMC_MCMC.jl
 create mode 100644 src/bayesian/BPINN_ode.jl
 create mode 100644 src/bayesian/advancedHMC_MCMC.jl
 create mode 100644 src/bayesian/collocated_estim.jl
 create mode 100644 test/bpinnexperimental.jl

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
deleted file mode 100644
index f65f1d659e..0000000000
--- a/src/BPINN_ode.jl
+++ /dev/null
@@ -1,229 +0,0 @@
-# HIGH level API for BPINN ODE solver
-
-"""
-    BNNODE(chain, kernel = HMC; strategy = nothing, draw_samples = 2000,
-           priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
-           phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
-           MCMCargs = (; n_leapfrog=30), nchains = 1, init_params = nothing,
-           Adaptorkwargs = (; Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
-                              Metric = DiagEuclideanMetric),
-           Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
-           progress = false, verbose = false)
-
-Algorithm for solving ordinary differential equations using a Bayesian neural network. This
-is a specialization of the physics-informed neural network which is used as a solver for a
-standard `ODEProblem`.
-
-!!! warn
-
-    Note that BNNODE only supports ODEs which are written in the out-of-place form, i.e.
-    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the BNNODE
-    will exit with an error.
-
-## Positional Arguments
-
-* `chain`: A neural network architecture, defined as a `Lux.AbstractLuxLayer`.
-* `kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC`
-
-## Keyword Arguments
-
-(refer `NeuralPDE.ahmc_bayesian_pinn_ode` keyword arguments.)
-
-## Example
-
-```julia
-linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
-tspan = (0.0, 10.0)
-u0 = 0.0
-p = [5.0, -5.0]
-prob = ODEProblem(linear, u0, tspan, p)
-linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t))
-
-sol = solve(prob, Tsit5(); saveat = 0.05)
-u = sol.u[1:100]
-time = sol.t[1:100]
-x̂ = u .+ (u .* 0.2) .* randn(size(u))
-dataset = [x̂, time]
-
-chainlux = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
-
-alg = BNNODE(chainlux; draw_samples = 2000, l2std = [0.05], phystd = [0.05],
-             priorsNNw = (0.0, 3.0), progress = true)
-
-sol_lux = solve(prob, alg)
-
-# with parameter estimation
-alg = BNNODE(chainlux; dataset, draw_samples = 2000, l2std = [0.05], phystd = [0.05],
-             priorsNNw = (0.0, 10.0), param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
-             progress = true)
-
-sol_lux_pestim = solve(prob, alg)
-```
-
-## Solution Notes
-
-Note that the solution is evaluated at fixed time points according to the strategy chosen.
-ensemble solution is evaluated and given at steps of `saveat`.
-Dataset should only be provided when ODE parameter Estimation is being done.
-The neural network is a fully continuous solution so `BPINNsolution`
-is an accurate interpolation (up to the neural network training result). In addition, the
-`BPINNstats` is returned as `sol.fullsolution` for further analysis.
-
-## References
-
-Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural
-Networks for Forward and Inverse PDE Problems with Noisy Data".
-
-Kevin Linka, Amelie Schäfer, Xuhui Meng, Zongren Zou, George Em Karniadakis, Ellen Kuhl
-"Bayesian Physics Informed Neural Networks for real-world nonlinear dynamical systems".
-"""
-@concrete struct BNNODE <: NeuralPDEAlgorithm
-    chain <: AbstractLuxLayer
-    kernel
-    strategy <: Union{Nothing, AbstractTrainingStrategy}
-    draw_samples::Int
-    priorsNNw::Tuple{Float64, Float64}
-    param <: Union{Nothing, Vector{<:Distribution}}
-    l2std::Vector{Float64}
-    phystd::Vector{Float64}
-    dataset <: Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
-    physdt::Float64
-    MCMCkwargs <: NamedTuple
-    nchains::Int
-    init_params <: Union{Nothing, <:NamedTuple, Vector{<:AbstractFloat}}
-    Adaptorkwargs <: NamedTuple
-    Integratorkwargs <: NamedTuple
-    numensemble::Int
-    estim_collocate::Bool
-    autodiff::Bool
-    progress::Bool
-    verbose::Bool
-end
-
-function BNNODE(chain, kernel = HMC; strategy = nothing, draw_samples = 2000,
-        priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05],
-        dataset = [nothing], physdt = 1 / 20.0, MCMCkwargs = (n_leapfrog = 30,),
-        nchains = 1, init_params = nothing,
-        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-        Integratorkwargs = (Integrator = Leapfrog,),
-        numensemble = floor(Int, draw_samples / 3),
-        estim_collocate = false, autodiff = false, progress = false, verbose = false)
-    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
-    return BNNODE(chain, kernel, strategy, draw_samples, priorsNNw, param, l2std, phystd,
-        dataset, physdt, MCMCkwargs, nchains, init_params, Adaptorkwargs,
-        Integratorkwargs, numensemble, estim_collocate, autodiff, progress, verbose)
-end
-
-"""
-Contains `ahmc_bayesian_pinn_ode()` function output:
-
-1. A MCMCChains.jl chain object for sampled parameters.
-2. The set of all sampled parameters.
-3. Statistics like:
-    - n_steps
-    - acceptance_rate
-    - log_density
-    - hamiltonian_energy
-    - hamiltonian_energy_error
-    - numerical_error
-    - step_size
-    - nom_step_size
-"""
-@concrete struct BPINNstats
-    mcmc_chain
-    samples
-    statistics
-end
-
-"""
-BPINN Solution contains the original solution from AdvancedHMC.jl sampling (BPINNstats
-contains fields related to that).
-
-1. `ensemblesol` is the Probabilistic Estimate (MonteCarloMeasurements.jl Particles type) of
-   Ensemble solution from All Neural Network's (made using all sampled parameters) output's.
-2. `estimated_nn_params` - Probabilistic Estimate of NN params from sampled weights, biases.
-3. `estimated_de_params` - Probabilistic Estimate of DE params from sampled unknown DE
-   parameters.
-"""
-@concrete struct BPINNsolution
-    original <: BPINNstats
-    ensemblesol
-    estimated_nn_params
-    estimated_de_params
-    timepoints
-end
-
-function SciMLBase.__solve(prob::SciMLBase.ODEProblem, alg::BNNODE, args...; dt = nothing,
-        timeseries_errors = true, save_everystep = true, adaptive = false,
-        abstol = 1.0f-6, reltol = 1.0f-3, verbose = false, saveat = 1 / 50.0,
-        maxiters = nothing, numensemble = floor(Int, alg.draw_samples / 3))
-    (; chain, param, strategy, draw_samples, numensemble, verbose) = alg
-
-    # ahmc_bayesian_pinn_ode needs param=[] for easier vcat operation for full vector of parameters
-    param = param === nothing ? [] : param
-    strategy = strategy === nothing ? GridTraining : strategy
-
-    @assert alg.draw_samples≥0 "Number of samples to be drawn has to be >=0."
-
-    mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(
-        prob, chain; strategy, alg.dataset, alg.draw_samples, alg.init_params,
-        alg.physdt, alg.l2std, alg.phystd, alg.priorsNNw, param, alg.nchains, alg.autodiff,
-        Kernel = alg.kernel, alg.Adaptorkwargs, alg.Integratorkwargs,
-        alg.MCMCkwargs, alg.progress, alg.verbose, alg.estim_collocate)
-
-    fullsolution = BPINNstats(mcmcchain, samples, statistics)
-    ninv = length(param)
-    t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2])
-
-    θinit, st = LuxCore.setup(Random.default_rng(), chain)
-    θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
-         for i in 1:max(draw_samples - draw_samples ÷ 10, draw_samples - 1000)]
-
-    luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
-    # only need for size
-    θinit = collect(ComponentArray(θinit))
-
-    # constructing ensemble predictions
-    ensemblecurves = Vector{}[]
-    # check if NN output is more than 1
-    numoutput = size(luxar[1])[1]
-    if numoutput > 1
-        # Initialize a vector to store the separated outputs for each output dimension
-        output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput]
-
-        # Loop through each element in `luxar`
-        for element in luxar
-            for i in 1:numoutput
-                push!(output_matrices[i], element[i, :])  # Append the i-th output (i-th row) to the i-th output_matrices
-            end
-        end
-
-        for r in 1:numoutput
-            ensem_r = hcat(output_matrices[r]...)'
-            ensemblecurve_r = prob.u0[r] .+
-                              [Particles(ensem_r[:, i]) for i in 1:length(t)] .*
-                              (t .- prob.tspan[1])
-            push!(ensemblecurves, ensemblecurve_r)
-        end
-
-    else
-        ensemblecurve = prob.u0 .+
-                        [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .*
-                        (t .- prob.tspan[1])
-        push!(ensemblecurves, ensemblecurve)
-    end
-
-    nnparams = length(θinit)
-    estimnnparams = [Particles(reduce(hcat, samples[(end - numensemble):end])[i, :])
-                     for i in 1:nnparams]
-
-    if ninv == 0
-        estimated_params = [nothing]
-    else
-        estimated_params = [Particles(reduce(hcat, samples[(end - numensemble):end])[i, :])
-                            for i in (nnparams + 1):(nnparams + ninv)]
-    end
-
-    return BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params, t)
-end
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
deleted file mode 100644
index 0112cbaaa9..0000000000
--- a/src/advancedHMC_MCMC.jl
+++ /dev/null
@@ -1,807 +0,0 @@
-@concrete struct LogTargetDensity
-    dim::Int
-    prob <: SciMLBase.ODEProblem
-    smodel <: StatefulLuxLayer
-    strategy <: AbstractTrainingStrategy
-    dataset <: Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
-    priors <: Vector{<:Distribution}
-    phystd::Vector{Float64}
-    l2std::Vector{Float64}
-    autodiff::Bool
-    physdt::Float64
-    extraparams::Int
-    init_params <: Union{NamedTuple, ComponentArray}
-    estim_collocate::Bool
-end
-
-"""
-NN OUTPUT AT t,θ ~ phi(t,θ).
-"""
-function (f::LogTargetDensity)(t::AbstractVector, θ)
-    θ = vector_to_parameters(θ, f.init_params)
-    dev = safe_get_device(θ)
-    t = safe_expand(dev, t)
-    u0 = f.prob.u0 |> dev
-    return u0 .+ (t' .- f.prob.tspan[1]) .* f.smodel(t', θ)
-end
-
-(f::LogTargetDensity)(t::Number, θ) = f([t], θ)[:, 1]
-
-"""
-Similar to ode_dfdx() in NNODE.
-"""
-function ode_dfdx(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
-    if autodiff
-        return ForwardDiff.jacobian(Base.Fix2(phi, θ), t)
-    else
-        ϵ = sqrt(eps(eltype(t)))
-        return (phi(t .+ ϵ, θ) .- phi(t, θ)) ./ ϵ
-    end
-end
-
-"""
-Function needed for converting vector of sampled parameters into ComponentVector in case of Lux chain output, derivatives
-the sampled parameters are of exotic type `Dual` due to ForwardDiff's autodiff tagging.
-"""
-function vector_to_parameters(ps_new::AbstractVector, ps::Union{NamedTuple, ComponentArray})
-    @assert length(ps_new) == LuxCore.parameterlength(ps)
-    i = 1
-    function get_ps(x)
-        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
-        i += length(x)
-        return z
-    end
-    return fmap(get_ps, ps)
-end
-
-vector_to_parameters(ps_new::AbstractVector, _::AbstractVector) = ps_new
-
-function LogDensityProblems.logdensity(ltd::LogTargetDensity, θ)
-    ldensity = physloglikelihood(ltd, θ) + priorweights(ltd, θ) + L2LossData(ltd, θ)
-    ltd.estim_collocate && return ldensity + L2loss2(ltd, θ)
-    return ldensity
-end
-
-LogDensityProblems.dimension(ltd::LogTargetDensity) = ltd.dim
-
-function LogDensityProblems.capabilities(::LogTargetDensity)
-    return LogDensityProblems.LogDensityOrder{1}()
-end
-
-# suggested extra loss function
-function L2loss2(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        dataset, deri_sol = Tar.dataset
-        # deri_sol = deri_sol'
-        autodiff = Tar.autodiff
-
-        # # Timepoints to enforce Physics
-        # dataset = Array(reduce(hcat, dataset)')
-        # t = dataset[end, :]
-        # û = dataset[1:(end - 1), :]
-
-        # ode_params = Tar.extraparams == 1 ?
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        # if length(û[:, 1]) == 1
-        #     physsol = [f(û[:, i][1],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # else
-        #     physsol = [f(û[:, i],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # end
-        # #form of NN output matrix output dim x n
-        # deri_physsol = reduce(hcat, physsol)
-
-        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
-        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
-        # if length(û[:, 1]) == 1
-        #     deri_sol = [f(û[:, i][1],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # else
-        #     deri_sol = [f(û[:, i],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # end
-        # deri_sol = reduce(hcat, deri_sol) 
-        # deri_sol = reduce(hcat, derivatives)
-
-        # Timepoints to enforce Physics 
-        t = dataset[end]
-        u1 = dataset[2]
-        û = dataset[1]
-        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
-        #  
-
-        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        if length(Tar.prob.u0) == 1
-            physsol = [f(û[i],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        else
-            physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        end
-        #form of NN output matrix output dim x n 
-        deri_physsol = reduce(hcat, physsol)
-
-        # if length(Tar.prob.u0) == 1
-        #     nnsol = [f(û[i],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # else
-        #     nnsol = [f([û[i], u1[i]],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # end
-        # form of NN output matrix output dim x n
-        # nnsol = reduce(hcat, nnsol)
-
-        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
-        # # convert to matrix as nnsol  
-
-        physlogprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # can add phystd[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 4.0) .*
-                        ones(length(nnsol[i, :]))))),
-                nnsol[i, :])
-        end
-        return physlogprob
-    else
-        return 0
-    end
-end
-
-# PDE(DU,U,P,T)=0
-
-# Derivated via Central Diff
-# function calculate_derivatives2(dataset)
-#     x̂, time = dataset
-#     num_points = length(x̂)
-#     # Initialize an array to store the derivative values.
-#     derivatives = similar(x̂)
-
-#     for i in 2:(num_points - 1)
-#         # Calculate the first-order derivative using central differences.
-#         Δt_forward = time[i + 1] - time[i]
-#         Δt_backward = time[i] - time[i - 1]
-
-#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-#         derivatives[i] = derivative
-#     end
-
-#     # Derivatives at the endpoints can be calculated using forward or backward differences.
-#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-#     return derivatives
-# end
-
-function calderivatives(prob, dataset)
-    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
-        Flux.Dense(8, 2)) |> Flux.f64
-    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-    function loss(x, y)
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
-        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
-        sum(Flux.mse.(chainflux(x), y))
-    end
-    optimizer = Flux.Optimise.ADAM(0.01)
-    epochs = 3000
-    for epoch in 1:epochs
-        Flux.train!(loss,
-            Flux.params(chainflux),
-            [(dataset[end]', dataset[1:(end - 1)])],
-            optimizer)
-    end
-
-    # A1 = (prob.u0' .+
-    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
-    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
-
-    # A2 = (prob.u0' .+
-    #       (prob.tspan[2] .- (dataset[end]'))' .*
-    #       chainflux(dataset[end]')')
-
-    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
-    A2 = chainflux(dataset[end]')
-
-    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
-
-    return gradients
-end
-
-
-# suggested extra loss function
-function L2loss2(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        dataset, deri_sol = Tar.dataset
-        # deri_sol = deri_sol'
-        autodiff = Tar.autodiff
-
-        # # Timepoints to enforce Physics
-        # dataset = Array(reduce(hcat, dataset)')
-        # t = dataset[end, :]
-        # û = dataset[1:(end - 1), :]
-
-        # ode_params = Tar.extraparams == 1 ?
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        # if length(û[:, 1]) == 1
-        #     physsol = [f(û[:, i][1],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # else
-        #     physsol = [f(û[:, i],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # end
-        # #form of NN output matrix output dim x n
-        # deri_physsol = reduce(hcat, physsol)
-
-        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
-        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
-        # if length(û[:, 1]) == 1
-        #     deri_sol = [f(û[:, i][1],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # else
-        #     deri_sol = [f(û[:, i],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # end
-        # deri_sol = reduce(hcat, deri_sol) 
-        # deri_sol = reduce(hcat, derivatives)
-
-        # Timepoints to enforce Physics 
-        t = dataset[end]
-        u1 = dataset[2]
-        û = dataset[1]
-        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
-        #  
-
-        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        if length(Tar.prob.u0) == 1
-            physsol = [f(û[i],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        else
-            physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        end
-        #form of NN output matrix output dim x n 
-        deri_physsol = reduce(hcat, physsol)
-
-        # if length(Tar.prob.u0) == 1
-        #     nnsol = [f(û[i],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # else
-        #     nnsol = [f([û[i], u1[i]],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # end
-        # form of NN output matrix output dim x n
-        # nnsol = reduce(hcat, nnsol)
-
-        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
-        # # convert to matrix as nnsol  
-
-        physlogprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # can add phystd[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 4.0) .*
-                        ones(length(nnsol[i, :]))))),
-                nnsol[i, :])
-        end
-        return physlogprob
-    else
-        return 0
-    end
-end
-
-
-function calculate_derivatives(dataset)
-
-    # u = dataset[1]
-    # u1 = dataset[2]
-    # t = dataset[end]
-    # # control points
-    # n = Int(floor(length(t) / 10))
-    # # spline for datasetvalues(solution) 
-    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    # interp = CubicSpline(u, t)
-    # interp1 = CubicSpline(u1, t)
-    # # derrivatives interpolation
-    # dx = t[2] - t[1]
-    # time = collect(t[1]:dx:t[end])
-    # smoothu = [interp(i) for i in time]
-    # smoothu1 = [interp1(i) for i in time]
-    # # derivative of the spline (must match function derivative) 
-    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
-    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
-    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
-    # # FDM
-    # # û1 = diff(u) / dx
-    # # dataset[1] and smoothu are almost equal(rounding errors)
-    # return [û, û1] 
-
-end
-
-"""
-suggested extra loss function for ODE solver case
-"""
-@views function L2loss2(ltd::LogTargetDensity, θ)
-    ltd.extraparams ≤ 0 && return false  # XXX: type-stability?
-
-    f = ltd.prob.f
-    t = ltd.dataset[end]
-    u1 = ltd.dataset[2]
-    û = ltd.dataset[1]
-
-    nnsol = ode_dfdx(ltd, t, θ[1:(length(θ) - ltd.extraparams)], ltd.autodiff)
-
-    ode_params = ltd.extraparams == 1 ? θ[((length(θ) - ltd.extraparams) + 1)] :
-                 θ[((length(θ) - ltd.extraparams) + 1):length(θ)]
-
-    physsol = if length(ltd.prob.u0) == 1
-        [f(û[i], ode_params, tᵢ) for (i, tᵢ) in enumerate(t)]
-    else
-        [f([û[i], u1[i]], ode_params, tᵢ) for (i, tᵢ) in enumerate(t)]
-    end
-    # form of NN output matrix output dim x n
-    deri_physsol = reduce(hcat, physsol)
-    T = promote_type(eltype(deri_physsol), eltype(nnsol))
-
-    physlogprob = T(0)
-    for i in 1:length(ltd.prob.u0)
-        physlogprob += logpdf(
-            MvNormal(deri_physsol[i, :],
-                Diagonal(abs2.(T(ltd.phystd[i]) .* ones(T, length(nnsol[i, :]))))),
-            nnsol[i, :]
-        )
-    end
-    return physlogprob
-end
-
-"""
-L2 loss loglikelihood(needed for ODE parameter estimation).
-"""
-@views function L2LossData(ltd::LogTargetDensity, θ)
-    (ltd.dataset isa Vector{Nothing} || ltd.extraparams == 0) && return 0
-
-    # matrix(each row corresponds to vector u's rows)
-    nn = ltd(ltd.dataset[end], θ[1:(length(θ) - ltd.extraparams)])
-    T = eltype(nn)
-
-    L2logprob = zero(T)
-    for i in 1:length(ltd.prob.u0)
-        # for u[i] ith vector must be added to dataset,nn[1, :] is the dx in lotka_volterra
-        L2logprob += logpdf(
-            MvNormal(
-                nn[i, :],
-                Diagonal(abs2.(T(ltd.l2std[i]) .* ones(T, length(ltd.dataset[i]))))
-            ),
-            ltd.dataset[i]
-        )
-    end
-    return L2logprob
-end
-
-"""
-Physics loglikelihood over problem timespan + dataset timepoints.
-"""
-function physloglikelihood(ltd::LogTargetDensity, θ)
-    (; f, p, tspan) = ltd.prob
-    (; autodiff, strategy) = ltd
-
-    # parameter estimation chosen or not
-    if ltd.extraparams > 0
-        ode_params = ltd.extraparams == 1 ? θ[((length(θ) - ltd.extraparams) + 1)] :
-                     θ[((length(θ) - ltd.extraparams) + 1):length(θ)]
-    else
-        ode_params = p isa SciMLBase.NullParameters ? Float64[] : p
-    end
-
-    return getlogpdf(strategy, ltd, f, autodiff, tspan, ode_params, θ)
-end
-
-function getlogpdf(strategy::GridTraining, ltd::LogTargetDensity, f, autodiff::Bool,
-        tspan, ode_params, θ)
-    ts = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
-    t = ltd.dataset isa Vector{Nothing} ? ts : vcat(ts, ltd.dataset[end])
-    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
-end
-
-function getlogpdf(strategy::StochasticTraining, ltd::LogTargetDensity,
-        f, autodiff::Bool, tspan, ode_params, θ)
-    T = promote_type(eltype(tspan[1]), eltype(tspan[2]))
-    samples = (tspan[2] - tspan[1]) .* rand(T, strategy.points) .+ tspan[1]
-    t = ltd.dataset isa Vector{Nothing} ? samples : vcat(samples, ltd.dataset[end])
-    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
-end
-
-function getlogpdf(strategy::QuadratureTraining, ltd::LogTargetDensity, f, autodiff::Bool,
-        tspan, ode_params, θ)
-    integrand(t::Number, θ) = innerdiff(ltd, f, autodiff, [t], θ, ode_params)
-    intprob = IntegralProblem(
-        integrand, (tspan[1], tspan[2]), θ; nout = length(ltd.prob.u0))
-    sol = solve(intprob, QuadGKJL(); strategy.abstol, strategy.reltol)
-    return sum(sol.u)
-end
-
-function getlogpdf(strategy::WeightedIntervalTraining, ltd::LogTargetDensity, f,
-        autodiff::Bool, tspan, ode_params, θ)
-    minT, maxT = tspan
-    weights = strategy.weights ./ sum(strategy.weights)
-    N = length(weights)
-    difference = (maxT - minT) / N
-
-    ts = eltype(difference)[]
-    for (index, item) in enumerate(weights)
-        temp_data = rand(1, trunc(Int, strategy.points * item)) .* difference .+ minT .+
-                    ((index - 1) * difference)
-        append!(ts, temp_data)
-    end
-
-    t = ltd.dataset isa Vector{Nothing} ? ts : vcat(ts, ltd.dataset[end])
-    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
-end
-
-"""
-MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ.
-"""
-@views function innerdiff(ltd::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
-        ode_params)
-    # ltd used for phi and LogTargetDensity object attributes access
-    out = ltd(t, θ[1:(length(θ) - ltd.extraparams)])
-
-    # reject samples case(write clear reason why)
-    (any(isinf, out[:, 1]) || any(isinf, ode_params)) && return convert(eltype(out), -Inf)
-
-    # this is a vector{vector{dx,dy}}(handle case single u(float passed))
-    if length(out[:, 1]) == 1
-        physsol = [f(out[:, i][1], ode_params, t[i]) for i in 1:length(out[1, :])]
-    else
-        physsol = [f(out[:, i], ode_params, t[i]) for i in 1:length(out[1, :])]
-    end
-    physsol = reduce(hcat, physsol)
-
-    nnsol = ode_dfdx(ltd, t, θ[1:(length(θ) - ltd.extraparams)], autodiff)
-
-    vals = nnsol .- physsol
-    T = eltype(vals)
-
-    # N dimensional vector if N outputs for NN(each row has logpdf of u[i] where u is vector
-    # of dependant variables)
-    return [logpdf(
-                MvNormal(vals[i, :],
-                    Diagonal(abs2.(T(ltd.phystd[i]) .* ones(T, length(vals[i, :]))))),
-                zeros(T, length(vals[i, :]))
-            ) for i in 1:length(ltd.prob.u0)]
-end
-
-"""
-Prior logpdf for NN parameters + ODE constants.
-"""
-@views function priorweights(ltd::LogTargetDensity, θ)
-    allparams = ltd.priors
-    nnwparams = allparams[1] # nn weights
-
-    ltd.extraparams ≤ 0 && return logpdf(nnwparams, θ)
-
-    # Vector of ode parameters priors
-    invpriors = allparams[2:end]
-
-    invlogpdf = sum(
-        logpdf(invpriors[length(θ) - i + 1], θ[i])
-    for i in (length(θ) - ltd.extraparams + 1):length(θ))
-
-    return invlogpdf + logpdf(nnwparams, θ[1:(length(θ) - ltd.extraparams)])
-end
-
-function generate_ltd(chain::AbstractLuxLayer, init_params)
-    return init_params, chain, LuxCore.initialstates(Random.default_rng(), chain)
-end
-
-function generate_ltd(chain::AbstractLuxLayer, ::Nothing)
-    θ, st = LuxCore.setup(Random.default_rng(), chain)
-    return θ, chain, st
-end
-
-function kernelchoice(Kernel, MCMCkwargs)
-    if Kernel == HMCDA
-        Kernel(MCMCkwargs[:δ], MCMCkwargs[:λ])
-    elseif Kernel == NUTS
-        δ, max_depth, Δ_max = MCMCkwargs[:δ], MCMCkwargs[:max_depth], MCMCkwargs[:Δ_max]
-        Kernel(δ; max_depth, Δ_max)
-    else # HMC
-        Kernel(MCMCkwargs[:n_leapfrog])
-    end
-end
-
-"""
-    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining, dataset = [nothing],
-                           init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0f0,
-                           l2std = [0.05], phystd = [0.05], priorsNNw = (0.0, 2.0),
-                           param = [], nchains = 1, autodiff = false, Kernel = HMC,
-                           Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-                               Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-                           Integratorkwargs = (Integrator = Leapfrog,),
-                           MCMCkwargs = (n_leapfrog = 30,), progress = false,
-                           verbose = false)
-
-!!! warn
-
-    Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the
-    out-of-place form, i.e. `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared
-    out-of-place, then `ahmc_bayesian_pinn_ode()` will exit with an error.
-
-## Example
-
-```julia
-linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
-tspan = (0.0, 10.0)
-u0 = 0.0
-p = [5.0, -5.0]
-prob = ODEProblem(linear, u0, tspan, p)
-
-### CREATE DATASET (Necessity for accurate Parameter estimation)
-sol = solve(prob, Tsit5(); saveat = 0.05)
-u = sol.u[1:100]
-time = sol.t[1:100]
-
-### dataset and BPINN create
-x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
-dataset = [x̂, time]
-
-chain1 = Lux.Chain(Lux.Dense(1, 5, tanh), Lux.Dense(5, 5, tanh), Lux.Dense(5, 1)
-
-### simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
-fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chain1,
-                                                            dataset = dataset,
-                                                            draw_samples = 1500,
-                                                            l2std = [0.05],
-                                                            phystd = [0.05],
-                                                            priorsNNw = (0.0,3.0))
-
-### solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
-fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chain1,
-                                                            dataset = dataset,
-                                                            draw_samples = 1500,
-                                                            l2std = [0.05],
-                                                            phystd = [0.05],
-                                                            priorsNNw = (0.0,3.0),
-                                                            param = [Normal(6.5,0.5), Normal(-3,0.5)])
-```
-
-## NOTES
-
-Dataset is required for accurate Parameter estimation + solving equations
-Incase you are only solving the Equations for solution, do not provide dataset
-
-## Positional Arguments
-
-* `prob`: DEProblem(out of place and the function signature should be f(u,p,t).
-* `chain`: Lux Neural Netork which would be made the Bayesian PINN.
-
-## Keyword Arguments
-
-* `strategy`: The training strategy used to choose the points for the evaluations. By
-  default GridTraining is used with given physdt discretization.
-* `init_params`: initial parameter values for BPINN (ideally for multiple chains different
-  initializations preferred)
-* `nchains`: number of chains you want to sample
-* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are
-  ~2/3 of draw samples)
-* `l2std`: standard deviation of BPINN prediction against L2 losses/Dataset
-* `phystd`: standard deviation of BPINN prediction against Chosen Underlying ODE System
-* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of
-  BPINN are Normal Distributions by default.
-* `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
-* `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
-* `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
-* `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implementations HMC/NUTS/HMCDA)
-* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`.
-  Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`.
-  Refer: https://turinglang.org/AdvancedHMC.jl/stable/ Note: Target percentage (in decimal)
-  of iterations in which the proposals are accepted (0.8 by default)
-* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's (HMC/NUTS/HMCDA)
-  Arguments, as follows :
-    * `n_leapfrog`: number of leapfrog steps for HMC
-    * `δ`: target acceptance probability for NUTS and HMCDA
-    * `λ`: target trajectory length for HMCDA
-    * `max_depth`: Maximum doubling tree depth (NUTS)
-    * `Δ_max`: Maximum divergence during doubling tree (NUTS)
-    Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-* `progress`: controls whether to show the progress meter or not.
-* `verbose`: controls the verbosity. (Sample call args in AHMC)
-
-!!! warning
-
-    AdvancedHMC.jl is still developing convenience structs so might need changes on new
-    releases.
-"""
-function ahmc_bayesian_pinn_ode(
-        prob::SciMLBase.ODEProblem, chain; strategy = GridTraining, dataset = [nothing],
-        init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0, l2std = [0.05],
-        phystd = [0.05], priorsNNw = (0.0, 2.0), param = [], nchains = 1, autodiff = false,
-        Kernel = HMC,
-        Adaptorkwargs = (Adaptor = StanHMCAdaptor, Metric = DiagEuclideanMetric,
-            targetacceptancerate = 0.8),
-        Integratorkwargs = (Integrator = Leapfrog,), MCMCkwargs = (n_leapfrog = 30,),
-        progress = false, verbose = false, estim_collocate = false)
-    @assert !isinplace(prob) "The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."
-
-    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
-
-    strategy = strategy == GridTraining ? strategy(physdt) : strategy
-
-    if dataset != [nothing] &&
-       (length(dataset) < 2 || !(dataset isa Vector{<:Vector{<:AbstractFloat}}))
-        error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}")
-    end
-
-    if dataset != [nothing] && param == []
-        println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
-    elseif dataset == [nothing] && param != []
-        error("Dataset Required for Parameter Estimation.")
-    end
-
-    initial_nnθ, chain, st = generate_ltd(chain, init_params)
-
-    @assert nchains≤Threads.nthreads() "number of chains is greater than available threads"
-    @assert nchains≥1 "number of chains must be greater than 1"
-
-    # eltype(physdt) cause needs Float64 for find_good_stepsize
-    # Lux chain(using component array later as vector_to_parameter need namedtuple)
-    T = eltype(physdt)
-    initial_θ = getdata(ComponentArray{T}(initial_nnθ))
-
-    # adding ode parameter estimation
-    nparameters = length(initial_θ)
-    ninv = length(param)
-    priors = [
-        MvNormal(T(priorsNNw[1]) * ones(T, nparameters),
-        Diagonal(abs2.(T(priorsNNw[2]) .* ones(T, nparameters))))
-    ]
-
-    # append Ode params to all paramvector
-    if ninv > 0
-        # shift ode params(initialise ode params by prior means)
-        initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
-        priors = vcat(priors, param)
-        nparameters += ninv
-    end
-
-    t0 = prob.tspan[1]
-    smodel = StatefulLuxLayer{true}(chain, nothing, st)
-    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
-    ℓπ = LogTargetDensity(nparameters, prob, smodel, strategy, dataset, priors,
-        phystd, l2std, autodiff, physdt, ninv, initial_nnθ, estim_collocate)
-
-    if verbose
-        @printf("Current Physics Log-likelihood: %g\n", physloglikelihood(ℓπ, initial_θ))
-        @printf("Current Prior Log-likelihood: %g\n", priorweights(ℓπ, initial_θ))
-        @printf("Current MSE against dataset Log-likelihood: %g\n",
-            L2LossData(ℓπ, initial_θ))
-        if estim_collocate
-            @printf("Current gradient loss against dataset Log-likelihood: %g\n",
-                L2loss2(ℓπ, initial_θ))
-        end
-    end
-
-    Adaptor = Adaptorkwargs[:Adaptor]
-    Metric = Adaptorkwargs[:Metric]
-    targetacceptancerate = Adaptorkwargs[:targetacceptancerate]
-
-    # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
-    metric = Metric(nparameters)
-    hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
-
-    # parallel sampling option
-    if nchains != 1
-        # Cache to store the chains
-        chains = Vector{Any}(undef, nchains)
-        statsc = Vector{Any}(undef, nchains)
-        samplesc = Vector{Any}(undef, nchains)
-
-        Threads.@threads for i in 1:nchains
-            # each chain has different initial NNparameter values(better posterior exploration)
-            initial_θ = vcat(
-                randn(eltype(initial_θ), nparameters - ninv),
-                initial_θ[(nparameters - ninv + 1):end]
-            )
-            initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
-            integrator = integratorchoice(Integratorkwargs, initial_ϵ)
-            adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
-                StepSizeAdaptor(targetacceptancerate, integrator))
-
-            MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
-            Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
-            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
-                progress = progress, verbose = verbose)
-
-            samplesc[i] = samples
-            statsc[i] = stats
-            mcmc_chain = Chains(reduce(hcat, samples)')
-            chains[i] = mcmc_chain
-        end
-
-        return chains, samplesc, statsc
-    else
-        initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
-        integrator = integratorchoice(Integratorkwargs, initial_ϵ)
-        adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
-            StepSizeAdaptor(targetacceptancerate, integrator))
-
-        MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
-        Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
-        samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
-            adaptor; progress = progress, verbose = verbose)
-
-        if verbose
-            println("Sampling Complete.")
-            @printf("Current Physics Log-likelihood: %g\n",
-                physloglikelihood(ℓπ, samples[end]))
-            @printf("Current Prior Log-likelihood: %g\n", priorweights(ℓπ, samples[end]))
-            @printf("Current MSE against dataset Log-likelihood: %g\n",
-                L2LossData(ℓπ, samples[end]))
-            if estim_collocate
-                @printf("Current gradient loss against dataset Log-likelihood: %g\n",
-                    L2loss2(ℓπ, samples[end]))
-            end
-        end
-
-        # return a chain(basic chain),samples and stats
-        matrix_samples = reshape(hcat(samples...), (length(samples[1]), length(samples), 1))
-        mcmc_chain = MCMCChains.Chains(matrix_samples)
-        return mcmc_chain, samples, stats
-    end
-end
diff --git a/src/bayesian/BPINN_ode.jl b/src/bayesian/BPINN_ode.jl
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/bayesian/advancedHMC_MCMC.jl b/src/bayesian/advancedHMC_MCMC.jl
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/bayesian/collocated_estim.jl b/src/bayesian/collocated_estim.jl
new file mode 100644
index 0000000000..157388194e
--- /dev/null
+++ b/src/bayesian/collocated_estim.jl
@@ -0,0 +1,194 @@
+# suggested extra loss function
+function L2loss2(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        dataset, deri_sol = Tar.dataset
+        # deri_sol = deri_sol'
+        autodiff = Tar.autodiff
+
+        # # Timepoints to enforce Physics
+        # dataset = Array(reduce(hcat, dataset)')
+        # t = dataset[end, :]
+        # û = dataset[1:(end - 1), :]
+
+        # ode_params = Tar.extraparams == 1 ?
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        # if length(û[:, 1]) == 1
+        #     physsol = [f(û[:, i][1],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # else
+        #     physsol = [f(û[:, i],
+        #         ode_params,
+        #         t[i])
+        #                for i in 1:length(û[1, :])]
+        # end
+        # #form of NN output matrix output dim x n
+        # deri_physsol = reduce(hcat, physsol)
+
+        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
+        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
+        # if length(û[:, 1]) == 1
+        #     deri_sol = [f(û[:, i][1],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # else
+        #     deri_sol = [f(û[:, i],
+        #         Tar.prob.p,
+        #         t[i])
+        #                 for i in 1:length(û[1, :])]
+        # end
+        # deri_sol = reduce(hcat, deri_sol) 
+        # deri_sol = reduce(hcat, derivatives)
+
+        # Timepoints to enforce Physics 
+        t = dataset[end]
+        u1 = dataset[2]
+        û = dataset[1]
+        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
+        #  
+
+        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        if length(Tar.prob.u0) == 1
+            physsol = [f(û[i],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        else
+            physsol = [f([û[i], u1[i]],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        end
+        #form of NN output matrix output dim x n 
+        deri_physsol = reduce(hcat, physsol)
+
+        # if length(Tar.prob.u0) == 1
+        #     nnsol = [f(û[i],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # else
+        #     nnsol = [f([û[i], u1[i]],
+        #         Tar.prob.p,
+        #         t[i])
+        #              for i in 1:length(û[:, 1])]
+        # end
+        # form of NN output matrix output dim x n
+        # nnsol = reduce(hcat, nnsol)
+
+        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
+        # # convert to matrix as nnsol  
+
+        physlogprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # can add phystd[i] for u[i] 
+            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+                    LinearAlgebra.Diagonal(map(abs2,
+                        (Tar.l2std[i] * 4.0) .*
+                        ones(length(nnsol[i, :]))))),
+                nnsol[i, :])
+        end
+        return physlogprob
+    else
+        return 0
+    end
+end
+
+# PDE(DU,U,P,T)=0
+
+# Derivated via Central Diff
+# function calculate_derivatives2(dataset)
+#     x̂, time = dataset
+#     num_points = length(x̂)
+#     # Initialize an array to store the derivative values.
+#     derivatives = similar(x̂)
+
+#     for i in 2:(num_points - 1)
+#         # Calculate the first-order derivative using central differences.
+#         Δt_forward = time[i + 1] - time[i]
+#         Δt_backward = time[i] - time[i - 1]
+
+#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+#         derivatives[i] = derivative
+#     end
+
+#     # Derivatives at the endpoints can be calculated using forward or backward differences.
+#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+#     return derivatives
+# end
+
+function calderivatives(prob, dataset)
+    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
+        Flux.Dense(8, 2)) |> Flux.f64
+    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+    function loss(x, y)
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
+        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
+        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
+        sum(Flux.mse.(chainflux(x), y))
+    end
+    optimizer = Flux.Optimise.ADAM(0.01)
+    epochs = 3000
+    for epoch in 1:epochs
+        Flux.train!(loss,
+            Flux.params(chainflux),
+            [(dataset[end]', dataset[1:(end - 1)])],
+            optimizer)
+    end
+
+    # A1 = (prob.u0' .+
+    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
+    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
+
+    # A2 = (prob.u0' .+
+    #       (prob.tspan[2] .- (dataset[end]'))' .*
+    #       chainflux(dataset[end]')')
+
+    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
+    A2 = chainflux(dataset[end]')
+
+    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
+
+    return gradients
+end
+
+function calculate_derivatives(dataset)
+
+    # u = dataset[1]
+    # u1 = dataset[2]
+    # t = dataset[end]
+    # # control points
+    # n = Int(floor(length(t) / 10))
+    # # spline for datasetvalues(solution) 
+    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
+    # interp = CubicSpline(u, t)
+    # interp1 = CubicSpline(u1, t)
+    # # derrivatives interpolation
+    # dx = t[2] - t[1]
+    # time = collect(t[1]:dx:t[end])
+    # smoothu = [interp(i) for i in time]
+    # smoothu1 = [interp1(i) for i in time]
+    # # derivative of the spline (must match function derivative) 
+    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
+    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
+    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
+    # # FDM
+    # # û1 = diff(u) / dx
+    # # dataset[1] and smoothu are almost equal(rounding errors)
+    # return [û, û1] 
+
+end
\ No newline at end of file
diff --git a/test/bpinnexperimental.jl b/test/bpinnexperimental.jl
new file mode 100644
index 0000000000..153124b069
--- /dev/null
+++ b/test/bpinnexperimental.jl
@@ -0,0 +1,66 @@
+using Test, MCMCChains
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using Flux, OptimizationOptimisers, AdvancedHMC, Lux
+using Statistics, Random, Functors, ComponentArrays
+using NeuralPDE, MonteCarloMeasurements
+
+Random.seed!(110)
+
+using NeuralPDE, Lux, Plots, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 4.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+# Solve using OrdinaryDiffEq.jl solver
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+
+times = solution.t
+u = hcat(solution.u...)
+x = u[1, :] + (u[1, :]) .* (0.05 .* randn(length(u[1, :])))
+y = u[2, :] + (u[2, :]) .* (0.05 .* randn(length(u[2, :])))
+dataset = [x, y, times]
+
+plot(times, x, label = "noisy x")
+plot!(times, y, label = "noisy y")
+plot!(solution, labels = ["x" "y"])
+
+chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+    Lux.Dense(6, 2))
+
+alg = BNNODE(chain;
+dataset = dataset,
+draw_samples = 1000,
+l2std = [0.1, 0.1],
+phystd = [0.1, 0.1],
+priorsNNw = (0.0, 3.0),
+param = [
+    Normal(1, 2),
+    Normal(2, 2),
+    Normal(2, 2),
+    Normal(0, 2)], progress = false)
+
+sol_pestim = solve(prob, alg; saveat = dt)
+plot(times, sol_pestim.ensemblesol[1], label = "estimated x")
+plot!(times, sol_pestim.ensemblesol[2], label = "estimated y")
+
+# comparing it with the original solution
+plot!(solution, labels = ["true x" "true y"])
+
+sol_pestim.estimated_ode_params
\ No newline at end of file

From d8d5f4a67856b3f47cfa45ce1abb86fafea7a353 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Fri, 27 Oct 2023 16:58:42 -0400
Subject: [PATCH 068/107] fitzhughnagumo experiment and some edits

---
 src/bayesian/collocated_estim.jl | 10 ++---
 test/bpinnexperimental.jl        | 68 ++++++++++++++++++++++++++++----
 2 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/src/bayesian/collocated_estim.jl b/src/bayesian/collocated_estim.jl
index 157388194e..b113b76f12 100644
--- a/src/bayesian/collocated_estim.jl
+++ b/src/bayesian/collocated_estim.jl
@@ -4,10 +4,8 @@ function L2loss2(Tar::LogTargetDensity, θ)
 
     # parameter estimation chosen or not
     if Tar.extraparams > 0
-        dataset, deri_sol = Tar.dataset
         # deri_sol = deri_sol'
         autodiff = Tar.autodiff
-
         # # Timepoints to enforce Physics
         # dataset = Array(reduce(hcat, dataset)')
         # t = dataset[end, :]
@@ -48,9 +46,9 @@ function L2loss2(Tar::LogTargetDensity, θ)
         # deri_sol = reduce(hcat, derivatives)
 
         # Timepoints to enforce Physics 
-        t = dataset[end]
-        u1 = dataset[2]
-        û = dataset[1]
+        t = Tar.dataset[end]
+        u1 = Tar.dataset[2]
+        û = Tar.dataset[1]
         # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
         #  
 
@@ -69,7 +67,7 @@ function L2loss2(Tar::LogTargetDensity, θ)
             physsol = [f([û[i], u1[i]],
                 ode_params,
                 t[i])
-                       for i in 1:length(û[:, 1])]
+                       for i in 1:length(û)]
         end
         #form of NN output matrix output dim x n 
         deri_physsol = reduce(hcat, physsol)
diff --git a/test/bpinnexperimental.jl b/test/bpinnexperimental.jl
index 153124b069..ffe7fcf0f8 100644
--- a/test/bpinnexperimental.jl
+++ b/test/bpinnexperimental.jl
@@ -28,13 +28,13 @@ tspan = (0.0, 4.0)
 prob = ODEProblem(lotka_volterra, u0, tspan, p)
 
 # Solve using OrdinaryDiffEq.jl solver
-dt = 0.01
+dt = 0.2
 solution = solve(prob, Tsit5(); saveat = dt)
 
 times = solution.t
 u = hcat(solution.u...)
-x = u[1, :] + (u[1, :]) .* (0.05 .* randn(length(u[1, :])))
-y = u[2, :] + (u[2, :]) .* (0.05 .* randn(length(u[2, :])))
+x = u[1, :] + (u[1, :]) .* (0.3 .* randn(length(u[1, :])))
+y = u[2, :] + (u[2, :]) .* (0.3 .* randn(length(u[2, :])))
 dataset = [x, y, times]
 
 plot(times, x, label = "noisy x")
@@ -54,13 +54,65 @@ param = [
     Normal(1, 2),
     Normal(2, 2),
     Normal(2, 2),
-    Normal(0, 2)], progress = false)
+    Normal(0, 2)], progress = true)
 
-sol_pestim = solve(prob, alg; saveat = dt)
-plot(times, sol_pestim.ensemblesol[1], label = "estimated x")
-plot!(times, sol_pestim.ensemblesol[2], label = "estimated y")
+@time sol_pestim1 = solve(prob, alg; saveat = dt,)
+@time sol_pestim2 = solve(prob, alg; estim_collocate = true, saveat = dt)
+plot(times, sol_pestim1.ensemblesol[1], label = "estimated x1")
+plot!(times, sol_pestim2.ensemblesol[1], label = "estimated x2")
+plot!(times, sol_pestim1.ensemblesol[2], label = "estimated y1")
+plot!(times, sol_pestim2.ensemblesol[2], label = "estimated y2")
 
 # comparing it with the original solution
 plot!(solution, labels = ["true x" "true y"])
 
-sol_pestim.estimated_ode_params
\ No newline at end of file
+@show sol_pestim1.estimated_ode_params
+@show sol_pestim2.estimated_ode_params
+
+function fitz(u, p , t)
+    v, w = u[1], u[2]
+    a,b,τinv,l = p[1], p[2], p[3], p[4]
+    
+    dv = v - 0.33*v^3 -w + l
+    dw = τinv*(v +  a - b*w)
+
+    return [dv, dw]
+end
+
+prob_ode_fitzhughnagumo = ODEProblem(fitz, [1.0,1.0], (0.0,10.0), [0.7,0.8,1/12.5,0.5])
+dt = 0.5
+sol = solve(prob_ode_fitzhughnagumo, Tsit5(), saveat = dt)
+
+sig = 0.20
+data = Array(sol)
+dataset = [data[1,:] .+ (sig .* rand(length(sol.t))), data[2, :] .+ (sig .* rand(length(sol.t))), sol.t]
+priors = [truncated(Normal(0.5,1.0),0,1.5), truncated(Normal(0.5,1.0),0,1.5), truncated(Normal(0.0,0.5),0.0,0.5), truncated(Normal(0.5,1.0),0,1)]
+
+
+plot(sol.t, dataset[1], label = "noisy x")
+plot!(sol.t, dataset[2], label = "noisy y")
+plot!(sol, labels = ["x" "y"])
+
+chain = Lux.Chain(Lux.Dense(1, 10, tanh), Lux.Dense(10, 10, tanh),
+    Lux.Dense(10, 2))
+
+Adaptorkwargs = (Adaptor = AdvancedHMC.StanHMCAdaptor,
+    Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.65)
+alg = BNNODE(chain;
+dataset = dataset,
+draw_samples = 10000,
+l2std = [0.1, 0.1],
+phystd = [0.1, 0.1],
+priorsNNw = (0.01, 3.0),
+Adaptorkwargs = Adaptorkwargs,
+param = priors, progress = true)
+
+@time sol_pestim1 = solve(prob_ode_fitzhughnagumo, alg; saveat = dt)
+@time sol_pestim2 = solve(prob_ode_fitzhughnagumo, alg; estim_collocate = true, saveat = dt)
+plot!(sol.t, sol_pestim1.ensemblesol[1], label = "estimated x1")
+plot!(sol.t, sol_pestim2.ensemblesol[1], label = "estimated x2")
+plot!(sol.t, sol_pestim1.ensemblesol[2], label = "estimated y1")
+plot!(sol.t, sol_pestim2.ensemblesol[2], label = "estimated y2")
+
+@show sol_pestim1.estimated_ode_params
+@show sol_pestim2.estimated_ode_params
\ No newline at end of file

From dc37133c44ea940e2b84c6b3ba229cda0f207f14 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Sat, 28 Oct 2023 15:14:31 -0400
Subject: [PATCH 069/107] Scale logpdfs and fix chain creation

---
 test/bpinnexperimental.jl | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/test/bpinnexperimental.jl b/test/bpinnexperimental.jl
index ffe7fcf0f8..3de049bf58 100644
--- a/test/bpinnexperimental.jl
+++ b/test/bpinnexperimental.jl
@@ -86,7 +86,7 @@ sol = solve(prob_ode_fitzhughnagumo, Tsit5(), saveat = dt)
 sig = 0.20
 data = Array(sol)
 dataset = [data[1,:] .+ (sig .* rand(length(sol.t))), data[2, :] .+ (sig .* rand(length(sol.t))), sol.t]
-priors = [truncated(Normal(0.5,1.0),0,1.5), truncated(Normal(0.5,1.0),0,1.5), truncated(Normal(0.0,0.5),0.0,0.5), truncated(Normal(0.5,1.0),0,1)]
+priors = [Normal(0.5,1.0), Normal(0.5,1.0), Normal(0.0,0.5), Normal(0.5,1.0)]
 
 
 plot(sol.t, dataset[1], label = "noisy x")
@@ -97,22 +97,22 @@ chain = Lux.Chain(Lux.Dense(1, 10, tanh), Lux.Dense(10, 10, tanh),
     Lux.Dense(10, 2))
 
 Adaptorkwargs = (Adaptor = AdvancedHMC.StanHMCAdaptor,
-    Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.65)
+    Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.8)
 alg = BNNODE(chain;
 dataset = dataset,
-draw_samples = 10000,
+draw_samples = 1000,
 l2std = [0.1, 0.1],
 phystd = [0.1, 0.1],
 priorsNNw = (0.01, 3.0),
 Adaptorkwargs = Adaptorkwargs,
 param = priors, progress = true)
 
-@time sol_pestim1 = solve(prob_ode_fitzhughnagumo, alg; saveat = dt)
-@time sol_pestim2 = solve(prob_ode_fitzhughnagumo, alg; estim_collocate = true, saveat = dt)
-plot!(sol.t, sol_pestim1.ensemblesol[1], label = "estimated x1")
-plot!(sol.t, sol_pestim2.ensemblesol[1], label = "estimated x2")
-plot!(sol.t, sol_pestim1.ensemblesol[2], label = "estimated y1")
-plot!(sol.t, sol_pestim2.ensemblesol[2], label = "estimated y2")
+@time sol_pestim3 = solve(prob_ode_fitzhughnagumo, alg; saveat = dt)
+@time sol_pestim4 = solve(prob_ode_fitzhughnagumo, alg; estim_collocate = true, saveat = dt)
+plot!(sol.t, sol_pestim3.ensemblesol[1], label = "estimated x1")
+plot!(sol.t, sol_pestim4.ensemblesol[1], label = "estimated x2")
+plot!(sol.t, sol_pestim3.ensemblesol[2], label = "estimated y1")
+plot!(sol.t, sol_pestim4.ensemblesol[2], label = "estimated y2")
 
-@show sol_pestim1.estimated_ode_params
-@show sol_pestim2.estimated_ode_params
\ No newline at end of file
+@show sol_pestim3.estimated_ode_params
+@show sol_pestim4.estimated_ode_params

From 0deb44682b0455cffee6c70056b0157a8d0a70e4 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 02:24:06 +0530
Subject: [PATCH 070/107] keeping bayesian directory files in sync with master

---
 src/{bayesian => }/BPINN_ode.jl        | 0
 src/{bayesian => }/advancedHMC_MCMC.jl | 0
 src/{bayesian => }/collocated_estim.jl | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename src/{bayesian => }/BPINN_ode.jl (100%)
 rename src/{bayesian => }/advancedHMC_MCMC.jl (100%)
 rename src/{bayesian => }/collocated_estim.jl (100%)

diff --git a/src/bayesian/BPINN_ode.jl b/src/BPINN_ode.jl
similarity index 100%
rename from src/bayesian/BPINN_ode.jl
rename to src/BPINN_ode.jl
diff --git a/src/bayesian/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
similarity index 100%
rename from src/bayesian/advancedHMC_MCMC.jl
rename to src/advancedHMC_MCMC.jl
diff --git a/src/bayesian/collocated_estim.jl b/src/collocated_estim.jl
similarity index 100%
rename from src/bayesian/collocated_estim.jl
rename to src/collocated_estim.jl

From 4c5c5ca80ef2f71c22881a74ba7292ee623ffcb8 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 4 Feb 2024 02:47:52 +0530
Subject: [PATCH 071/107] keep new dir

---
 src/NeuralPDE.jl                       | 4 ++--
 src/{ => bayesian}/BPINN_ode.jl        | 0
 src/{ => bayesian}/advancedHMC_MCMC.jl | 0
 src/{ => bayesian}/collocated_estim.jl | 0
 4 files changed, 2 insertions(+), 2 deletions(-)
 rename src/{ => bayesian}/BPINN_ode.jl (100%)
 rename src/{ => bayesian}/advancedHMC_MCMC.jl (100%)
 rename src/{ => bayesian}/collocated_estim.jl (100%)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index c0798c6270..ff7c720d52 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -84,8 +84,8 @@ include("transform_inf_integral.jl")
 include("discretize.jl")
 
 include("neural_adapter.jl")
-include("advancedHMC_MCMC.jl")
-include("BPINN_ode.jl")
+include("bayesian/advancedHMC_MCMC.jl")
+include("bayesian/BPINN_ode.jl")
 include("PDE_BPINN.jl")
 
 include("dgm.jl")
diff --git a/src/BPINN_ode.jl b/src/bayesian/BPINN_ode.jl
similarity index 100%
rename from src/BPINN_ode.jl
rename to src/bayesian/BPINN_ode.jl
diff --git a/src/advancedHMC_MCMC.jl b/src/bayesian/advancedHMC_MCMC.jl
similarity index 100%
rename from src/advancedHMC_MCMC.jl
rename to src/bayesian/advancedHMC_MCMC.jl
diff --git a/src/collocated_estim.jl b/src/bayesian/collocated_estim.jl
similarity index 100%
rename from src/collocated_estim.jl
rename to src/bayesian/collocated_estim.jl

From 0a30bd37853d58c4f91ece6e286beecebeb3a870 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Mon, 12 Feb 2024 19:40:25 +0530
Subject: [PATCH 072/107] having problems with eval() call in recursive Dict
 creation

---
 src/PDE_BPINN.jl           | 36 +++++++++++++++++++-----------------
 src/training_strategies.jl | 21 +++++++++++++++++++++
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index ae5c9d98c8..9ce32a7e6a 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -56,7 +56,7 @@ function L2LossData(ltd::PDELogTargetDensity, θ)
     # dataset of form Vector[matrix_x, matrix_y, matrix_z]
     # matrix_i is of form [i,indvar1,indvar2,..] (needed in case if heterogenous domains)
     # note that indvar1,indvar2.. cols can be different values for different depvar matrices
-    # order follows pinnrep.depvars orders of variables (order of declaration in @variables macro)
+    # dataset,phi order follows pinnrep.depvars orders of variables (order of declaration in @variables macro)
 
     # Phi is the trial solution for each NN in chain array
     # Creating logpdf( MvNormal(Phi(t,θ),std), dataset[i] )
@@ -265,28 +265,30 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     dataset_pde, dataset_bc = discretization.dataset
 
     eqs = pinnrep.eqs
-    yuh1 = get_loss_2(pinnrep, dataset_pde, eqs)
-    eqs = pinnrep.bcs
-    yuh2 = get_loss_2(pinnrep, dataset_bc, eqs)
+    yuh1 = get_lossy(pinnrep, dataset_pde, eqs)
+    # eqs = pinnrep.bcs
+    # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
 
-    pde_loss_functions, bc_loss_functions = merge_dataset_with_loss_function(pinnrep,
-        dataset,
-        yuh1,
-        yuh2)
+    pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
+        GridTraining(0.1),
+        yuh1[i],
+        nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset_pde],
+        train_sets_bc = nothing)[1]
+                          for i in eachindex(yuh1)]
 
     function L2_loss2(θ, allstd)
         stdpdes, stdbcs, stdextra = allstd
-        pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
-                              for (i, pde_loss_function) in enumerate(pde_loss_functions)]
-
-        bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
-                             for (j, bc_loss_function) in enumerate(bc_loss_functions)]
-        println("pde_loglikelihoods : ", pde_loglikelihoods)
-        println("bc_loglikelihoods : ", bc_loglikelihoods)
-        return sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+        pde_loglikelihoods = [[logpdf(Normal(0, 0.8 * stdpdes[i]), pde_loss_function(θ))
+                               for (i, pde_loss_function) in enumerate(pde_loss_functions[i])]
+                              for i in eachindex(pde_loss_functions)]
+
+        # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+        #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
+        # println("bc_loglikelihoods : ", bc_loglikelihoods)
+        return sum(sum(pde_loglikelihoods))
+        # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
     end
 
-    println(L2_loss2)
     # WIP split dataset to respective equations
     if ((dataset_bc isa Nothing) && (dataset_pde isa Nothing))
         dataset = nothing
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index 974f2529fa..9b40cc10ce 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -14,6 +14,27 @@ corresponding to the grid spacing in each dimension.
     dx
 end
 
+function get_dataset_train_points(eqs, train_sets, pinnrep)
+    dict_depvar_input = pinnrep.dict_depvar_input
+    depvars = pinnrep.depvars
+    dict_depvars = pinnrep.dict_depvars
+    dict_indvars = pinnrep.dict_indvars
+
+    symbols_input = [(i, dict_depvar_input[i]) for i in depvars]
+    eq_args = NeuralPDE.get_argument(eqs, dict_indvars, dict_depvars)
+    points = []
+    for eq_arg in eq_args
+        a = []
+        for i in eachindex(symbols_input)
+            if symbols_input[i][2] == eq_arg
+                push!(a, train_sets[i][:, 2:end]')
+            end
+        end
+        push!(points, vcat(a...))
+    end
+    return points
+end
+
 # include dataset points in pde_residual loglikelihood (BayesianPINN)
 function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
         strategy::GridTraining, datafree_pde_loss_function,

From 58a98c5914fc36c6bd3746ba865bed4a82792192 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Mon, 12 Feb 2024 19:44:39 +0530
Subject: [PATCH 073/107] removed bayesian folder

---
 src/{bayesian => }/BPINN_ode.jl        | 0
 src/NeuralPDE.jl                       | 4 ++--
 src/{bayesian => }/advancedHMC_MCMC.jl | 0
 src/{bayesian => }/collocated_estim.jl | 0
 4 files changed, 2 insertions(+), 2 deletions(-)
 rename src/{bayesian => }/BPINN_ode.jl (100%)
 rename src/{bayesian => }/advancedHMC_MCMC.jl (100%)
 rename src/{bayesian => }/collocated_estim.jl (100%)

diff --git a/src/bayesian/BPINN_ode.jl b/src/BPINN_ode.jl
similarity index 100%
rename from src/bayesian/BPINN_ode.jl
rename to src/BPINN_ode.jl
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index ff7c720d52..c0798c6270 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -84,8 +84,8 @@ include("transform_inf_integral.jl")
 include("discretize.jl")
 
 include("neural_adapter.jl")
-include("bayesian/advancedHMC_MCMC.jl")
-include("bayesian/BPINN_ode.jl")
+include("advancedHMC_MCMC.jl")
+include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 
 include("dgm.jl")
diff --git a/src/bayesian/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
similarity index 100%
rename from src/bayesian/advancedHMC_MCMC.jl
rename to src/advancedHMC_MCMC.jl
diff --git a/src/bayesian/collocated_estim.jl b/src/collocated_estim.jl
similarity index 100%
rename from src/bayesian/collocated_estim.jl
rename to src/collocated_estim.jl

From cff28276c99da91ce365f72e810e1e59d5129190 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Mon, 12 Feb 2024 19:58:41 +0530
Subject: [PATCH 074/107] cleaned files, removed DataInterpolations

---
 test/BPINN_PDEinvsol_tests.jl | 363 ++++++++++++++++++++++------------
 1 file changed, 233 insertions(+), 130 deletions(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 4876328413..9461c088ea 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -183,106 +183,62 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     saveats = [0.01],
     param = [Normal(12.0, 2)])
 
-idealp = 10.0
-p_ = sol1.estimated_de_params[1]
-
-# plot(pmean(sol1.ensemblesol[1]), pmean(sol1.ensemblesol[2]), pmean(sol1.ensemblesol[3]))
-# plot(sol1.timepoints[1]', pmean(sol1.ensemblesol[1]))
-# plot!(sol1.timepoints[2]', pmean(sol1.ensemblesol[2]))
-# plot!(sol1.timepoints[3]', pmean(sol1.ensemblesol[3]))
-
-@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-
-# # NEW LOSS FUNCTION CODE
-# pinnrep = symbolic_discretize(pde_system, discretization)
-
-# # general equation with diff
-# # now 1> substute u(t), phi(t) values from dataset and get multiple equations
-# # phi[i] must be in numeric_derivative() form
-# # derivative(phi, u, [x, y], εs, order, θ) - use parse_equations() and interp object to create loss function
-# # this function must take interp objects(train sets)
-# # dataset - get u(t), t from dataset interpolations object
-# # make lhs-rhs loss
-# # sum losses
-
-# using DataInterpolations
-
-# # dataset_pde has normal matrix format 
-# # dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
-# function get_symbols(dict_depvar_input, dataset, depvars)
-#     # get datasets into splattable form
-#     splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
-#     # splat datasets onto Linear interpolations tables
-#     interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
-#     interps = Dict(depvars .=> interps)
-
-#     Dict_symbol_interps = Dict(depvar => (interps[depvar], dict_depvar_input[depvar])
-#                                for depvar in depvars)
-
-#     tobe_subs = Dict()
-#     for (a, b) in dict_depvar_input
-#         tobe_subs[a] = eval(:($a($(b...))))
-#     end
-
-#     to_subs = Dict()
-#     for (a, b) in Dict_symbol_interps
-#         b1, b2 = b
-#         to_subs[a] = eval(:($b1($(b2...))))
-#     end
-#     return to_subs, tobe_subs
-# end
-
-# function recur_expression(exp, Dict_differentials)
-#     for in_exp in exp.args
-#         if !(in_exp isa Expr)
-#             # skip +,== symbols, characters etc
-#             continue
-
-#         elseif in_exp.args[1] isa ModelingToolkit.Differential
-#             # first symbol of differential term
-#             # Dict_differentials for masking differential terms
-#             # and resubstituting differentials in equations after putting in interpolations
-#             Dict_differentials[eval(in_exp)] = Symbol("diff_$(length(Dict_differentials)+1)")
-#             return
-
-#         else
-#             recur_expression(in_exp, Dict_differentials)
-#         end
-#     end
-# end
-
-# # get datafree loss functions for new loss type
-# # need to call merge_strategy_with_loss_function() variant after this
-# function merge_dataset_with_loss_function(pinnrep::NeuralPDE.PINNRepresentation,
-#         dataset,
-#         datafree_pde_loss_function,
-#         datafree_bc_loss_function)
-#     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
-
-#     eltypeθ = eltype(pinnrep.flat_init_params)
-
-#     train_sets = [[dataset[i][:, 2] for i in eachindex(dataset)], [[0;;], [0;;], [0;;]]]
-
-#     # the points in the domain and on the boundary
-#     pde_train_sets, bcs_train_sets = train_sets
-#     # pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-#     #     pde_train_sets)
-#     # bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-#     #     bcs_train_sets)
-#     pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
-#                           for (_loss, _set) in zip(datafree_pde_loss_function,
-#         pde_train_sets)]
-
-#     bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ)
-#                          for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
-
-#     pde_loss_functions, bc_loss_functions
-# end
-
-# function get_loss_function(loss_function, train_set, eltypeθ; τ = nothing)
-#     loss = (θ) -> mean(abs2, loss_function(train_set, θ))
-# end
+    idealp = 10.0
+    p_ = sol1.estimated_de_params[1]
+    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+end
+
+@parameters t, x, p
+@variables u(..)
+Dt = Differential(t)
+Dx = Differential(x)
+eqs = [u(t, x) * Dt(u(t, x)) - cos(p * t) ~ 0, u(t, x) + Dx(u(t, x)) ~ 0.0]
+bcs = [u(0, x) ~ 0.0, u(t, 10) ~ 1.0]
+domains = [t ∈ Interval(0.0, 2.0), x ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(2, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t, x],
+    [u(t, x)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, u1, timepoints)]
+
+# checking all training strategies
+# discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+#     dataset = [dataset, nothing])
+
+discretization = BayesianPINN([chainl],
+    GridTraining([0.2, 0.2]),
+    param_estim = true, dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.01, 0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0, 1 / 20.0],
+    param = [Normal(3.0, 0.5)], progress = true)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
 
 # # for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
 # # and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
@@ -300,34 +256,143 @@ p_ = sol1.estimated_de_params[1]
 #     void_value = [recur_expression(exp_i, Dict_differentials) for exp_i in exp]
 #     # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
 
-#     # masking operation
-#     a = substitute.(eqs, Ref(Dict_differentials))
-#     b = substitute.(a, Ref(interp_subs_dict))
-#     # reverse dict for re-substituing values of Differential(t)(u(t)) etc
-#     rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
-#     eqs = substitute.(b, Ref(rev_Dict_differentials))
-#     # get losses
-#     loss_functions = [NeuralPDE.build_loss_function(pinnrep,
-#         eqs[i],
-#         pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
-# end
-
-# eqs = pde_system.eqs
-# yuh1 = get_loss_2(pinnrep, dataset, eqs)
-# eqs = pinnrep.bcs
-# yuh2 = get_loss_2(pinnrep, dataset, eqs)
-
-# pde_loss_functions, bc_loss_functions = merge_dataset_with_loss_function(pinnrep,
-#     dataset,
-#     yuh1,
-#     yuh2)
-
-# pde_loss_functions()
-# # logic for recursion formula to parse differentials
-# # # this below has the whole differential term
-# # toexpr(pde_system.eqs[1]).args[2].args[3].args[3] isa ModelingToolkit.Differential
-# # toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
-# # # .args[1] isa ModelingToolkit.Differential
+    # masking operation
+    a = substitute.(eqs, Ref(Dict_differentials))
+    println(a)
+    b = substitute.(a, Ref(interp_subs_dict))
+    println(b)
+    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
+    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
+    eqs = substitute.(b, Ref(rev_Dict_differentials))
+    # get losses
+    loss_functions = [NeuralPDE.build_loss_function(pinnrep,
+        eqs[i],
+        pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
+end
+
+# >why not mask differential
+function get_lossy(pinnrep, dataset, eqs)
+    depvars = pinnrep.depvars # order is same as dataset and interps
+    dict_depvar_input = pinnrep.dict_depvar_input
+
+    Dict_differentials = Dict()
+    exp = toexpr(eqs)
+    for exp_i in exp
+        recur_expression(exp_i, Dict_differentials)
+    end
+    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
+
+    # masking operation
+    println("Dict_differentials : ", Dict_differentials)
+    a = substitute.(eqs, Ref(Dict_differentials))
+    println("Masked Differential term : ", a)
+
+    to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars, eqs)
+    # for each row in dataset create u values for substituing in equation, n_equations=n_rows
+    eq_subs = [Dict(tobe_subs[depvar] => to_subs[depvar][i] for depvar in depvars)
+               for i in 1:size(dataset[1][:, 1])[1]]
+
+    b = []
+    for eq_sub in eq_subs
+        push!(b, [substitute(a_i, eq_sub) for a_i in a])
+    end
+
+    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
+    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
+
+    c = []
+    for b_i in b
+        push!(c, substitute.(b_i, Ref(rev_Dict_differentials)))
+    end
+    println("After re Substituting depvars : ", c[1])
+    # c = vcat(c...)
+    println(c)
+    c
+    # get losses
+    # loss_functions = [NeuralPDE.build_loss_function(pinnrep,
+    #     c[i, :][j],
+    #     pinnrep.pde_indvars[j]) for j in eachindex(pinnrep.pde_indvars)]
+    # return loss_functions
+end
+
+# dataset_pde has normal matrix format 
+# dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
+function get_symbols(dict_depvar_input, dataset, depvars, eqs)
+    depvar_vals = [dataset_i[:, 1] for dataset_i in dataset]
+    # order of depvars
+    to_subs = Dict(pinnrep.depvars .=> depvar_vals)
+
+    asrt = Symbolics.get_variables.(eqs)
+    # want only symbols of depvars
+    temp = unique(reduce(vcat, asrt))
+    # now we have all the depvars, we now need all depvars whcih can be substituted with data interps
+
+    tobe_subs = Dict()
+    for a in depvars
+        for i in temp
+            expr = toexpr(i)
+            if (expr isa Expr) && (expr.args[1] == a)
+                tobe_subs[a] = i
+            end
+        end
+    end
+
+    return to_subs, tobe_subs
+end
+
+yuh = get_symbols(pinnrep.dict_depvar_input, dataset, pinnrep.depvars, pinnrep.eqs)
+
+function recur_expression(exp, Dict_differentials)
+    for in_exp in exp.args
+        if !(in_exp isa Expr)
+            # skip +,== symbols, characters etc
+            continue
+
+        elseif in_exp.args[1] isa ModelingToolkit.Differential
+            # first symbol of differential term
+            # Dict_differentials for masking differential terms
+            # and resubstituting differentials in equations after putting in interpolations
+            # temp = in_exp.args[end]
+            # in_exp.args[end] = Symbolics.variable(in_exp.args[end])
+
+            Dict_differentials[in_exp] = Symbolics.variable("diff_$(length(Dict_differentials)+1)")
+            return
+        else
+            recur_expression(in_exp, Dict_differentials)
+        end
+    end
+end
+vars = Symbolics.variable.(hcat(pinnrep.indvars, pinnrep.depvars))
+toexpr(Differential(t)(Differential(u)(u(t))) + u(t) ~ 0).args[2]
+eqs
+# Differential(t)(u(t)) - cos(p * t) ~ 0
+exprs = toexpr(eqs)
+pop = Dict()
+recur_expression(exprs, pop)
+pop1 = Dict()
+for (a, b) in pop
+    pop1[eval(a)] = b
+end
+pop1
+a = substitute(eqs, pop1)
+
+Symbolics.get_variables(eqs[1])
+# eqs=a
+
+NeuralPDE.get_variables(pinnrep.eqs, pinnrep.dict_indvars, pinnrep.dict_depvars)
+NeuralPDE.get_argument(pinnrep.bcs, pinnrep.dict_indvars, pinnrep.dict_depvars)
+dx = pinnrep.strategy.dx
+eltypeθ = eltype(pinnrep.flat_init_params)
+
+# solve dataset physics loss for heterogenous case
+# create number of equations as number of interpolation and points(n rows)
+# follow masking and finally feed training sets as set in interpolations input of u(t,x,..)
+
+# logic for recursion formula to parse differentials
+# # this below has the whole differential term
+toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
+# toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
+# # .args[1] isa ModelingToolkit.Differential
 
 # # logic for interpolation and indvars splatting to get Equation parsing terms
 # # splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
@@ -340,5 +405,43 @@ p_ = sol1.estimated_de_params[1]
 # # yu = [LinearInterpolation(splat_i...) for splat_i in splat_form]
 # # Symbol(:($(yu[1])))
 
-# # logic to contrauct dict to feed for masking
-# # Dict(interps[depvar] => dict_depvar_input[depvar] for depvar in depvars)
\ No newline at end of file
+# logic to contrauct dict to feed for masking
+# Dict(interps[depvar] => dict_depvar_input[depvar] for depvar in depvars)
+
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+# checking all training strategies
+discretization = BayesianPINN([chainl], GridTraining(0.01), param_estim = true,
+    dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)], progress = true)
\ No newline at end of file

From 8b633b2607959afcbf08ddfc229bf5f3e4556475 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 15 Feb 2024 02:52:44 +0530
Subject: [PATCH 075/107] done with implementation

---
 src/PDE_BPINN.jl              |  16 +-
 src/training_strategies.jl    |  59 ++++--
 test/BPINN_PDEinvsol_tests.jl | 350 ++++++++++++----------------------
 3 files changed, 167 insertions(+), 258 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 9ce32a7e6a..6ee11fbc7c 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -260,31 +260,33 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
-        numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)
+        numensemble = floor(Int, draw_samples / 3), Dict_differentials = Dict(),
+        progress = false, verbose = false)
     pinnrep = symbolic_discretize(pde_system, discretization)
     dataset_pde, dataset_bc = discretization.dataset
 
-    eqs = pinnrep.eqs
-    yuh1 = get_lossy(pinnrep, dataset_pde, eqs)
+    yuh1 = get_lossy(pinnrep, dataset_pde, Dict_differentials)
     # eqs = pinnrep.bcs
     # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
 
+    # this is a vector of tuple{vector,nothing}
     pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
         GridTraining(0.1),
         yuh1[i],
         nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset_pde],
-        train_sets_bc = nothing)[1]
+        train_sets_bc = nothing)
                           for i in eachindex(yuh1)]
 
     function L2_loss2(θ, allstd)
         stdpdes, stdbcs, stdextra = allstd
-        pde_loglikelihoods = [[logpdf(Normal(0, 0.8 * stdpdes[i]), pde_loss_function(θ))
-                               for (i, pde_loss_function) in enumerate(pde_loss_functions[i])]
+        # first vector of losses,from tuple -> pde losses, first[1] pde loss
+        pde_loglikelihoods = [[logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
+                               for (j, pde_loss_function) in enumerate(pde_loss_functions[i][1])]
                               for i in eachindex(pde_loss_functions)]
 
         # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
         #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
-        # println("bc_loglikelihoods : ", bc_loglikelihoods)
+
         return sum(sum(pde_loglikelihoods))
         # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
     end
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index 9b40cc10ce..9c78d9979a 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -21,15 +21,22 @@ function get_dataset_train_points(eqs, train_sets, pinnrep)
     dict_indvars = pinnrep.dict_indvars
 
     symbols_input = [(i, dict_depvar_input[i]) for i in depvars]
+    # [(:u, [:t])]
     eq_args = NeuralPDE.get_argument(eqs, dict_indvars, dict_depvars)
+    # [[:t]]
+
     points = []
     for eq_arg in eq_args
         a = []
+        # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
         for i in eachindex(symbols_input)
             if symbols_input[i][2] == eq_arg
+                # include domain points of that depvar
+                # each loss equation take domain matrix [points..;points..]
                 push!(a, train_sets[i][:, 2:end]')
             end
         end
+        # vcat as new row for next equation
         push!(points, vcat(a...))
     end
     return points
@@ -41,21 +48,31 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
         datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc = nothing)
     eltypeθ = recursive_eltype(pinnrep.flat_init_params)
     adaptor = EltypeAdaptor{eltypeθ}()
+        strategy::GridTraining,
+        datafree_pde_loss_function,
+        datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc=nothing)
+    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
+
+    eltypeθ = eltype(pinnrep.flat_init_params)
 
-    # is vec as later each _set in pde_train_sets are columns as points transformed to
-    # vector of points (pde_train_sets must be rowwise)
-    pde_loss_functions = if train_sets_pde !== nothing
-        pde_train_sets = [train_set[:, 2:end] for train_set in train_sets_pde] |> adaptor
-        [get_loss_function(pinnrep, _loss, _set, eltypeθ, strategy)
-         for (_loss, _set) in zip(datafree_pde_loss_function, pde_train_sets)]
+    # is vec as later each _set in pde_train_sets are columns as points transformed to vector of points (pde_train_sets must be rowwise)
+    pde_loss_functions = if !(train_sets_pde isa Nothing)
+        pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+            train_sets_pde)
+
+        [get_loss_function(_loss, _set, eltypeθ, strategy)
+                              for (_loss, _set) in zip(datafree_pde_loss_function,
+            pde_train_sets)]
     else
         nothing
     end
+    
+    bc_loss_functions = if !(train_sets_bc isa Nothing)
+        bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+            train_sets_bc)
 
-    bc_loss_functions = if train_sets_bc !== nothing
-        bcs_train_sets = [train_set[:, 2:end] for train_set in train_sets_bc] |> adaptor
-        [get_loss_function(pinnrep, _loss, _set, eltypeθ, strategy)
-         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
+        [get_loss_function(_loss, _set, eltypeθ, strategy)
+                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
     else
         nothing
     end
@@ -73,15 +90,19 @@ function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
         dict_indvars, dict_depvars)
 
     # the points in the domain and on the boundary
-    pde_train_sets, bcs_train_sets = train_sets |> adaptor
-    pde_loss_functions = [get_loss_function(pinnrep, _loss, _set, eltypeθ, strategy)
-                          for (_loss, _set) in zip(
-        datafree_pde_loss_function, pde_train_sets)]
-
-    bc_loss_functions = [get_loss_function(pinnrep, _loss, _set, eltypeθ, strategy)
-                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
-
-    return pde_loss_functions, bc_loss_functions
+    pde_train_sets, bcs_train_sets = train_sets
+    pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+                            pde_train_sets)
+    bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+                            bcs_train_sets)
+
+    pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
+                          for (_loss, _set) in zip(datafree_pde_loss_function,
+                                                   pde_train_sets)]
+    bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
+                         for (_loss, _set) in zip(datafree_bc_loss_function,
+                                                   bcs_train_sets)]
+    pde_loss_functions, bc_loss_functions
 end
 
 function get_loss_function(
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 9461c088ea..5b47ce8fdb 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -189,159 +189,6 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
 end
 
-@parameters t, x, p
-@variables u(..)
-Dt = Differential(t)
-Dx = Differential(x)
-eqs = [u(t, x) * Dt(u(t, x)) - cos(p * t) ~ 0, u(t, x) + Dx(u(t, x)) ~ 0.0]
-bcs = [u(0, x) ~ 0.0, u(t, 10) ~ 1.0]
-domains = [t ∈ Interval(0.0, 2.0), x ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(2, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t, x],
-    [u(t, x)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, u1, timepoints)]
-
-# checking all training strategies
-# discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-#     dataset = [dataset, nothing])
-
-discretization = BayesianPINN([chainl],
-    GridTraining([0.2, 0.2]),
-    param_estim = true, dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.01, 0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0, 1 / 20.0],
-    param = [Normal(3.0, 0.5)], progress = true)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-# # for bc case, [bc]/bc eqs must be passed along with dataset_bc[i]
-# # and final loss for bc must be together in a vector(bcs has seperate type of dataset_bc)
-# # eqs is vector of pde eqs and dataset here is dataset_pde
-# # normally you get vector of losses
-# function get_loss_2(pinnrep, dataset, eqs)
-#     depvars = pinnrep.depvars # order is same as dataset and interps
-#     dict_depvar_input = pinnrep.dict_depvar_input
-
-#     to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars)
-#     interp_subs_dict = Dict(tobe_subs[depvar] => to_subs[depvar] for depvar in depvars)
-
-#     Dict_differentials = Dict()
-#     exp = toexpr(eqs)
-#     void_value = [recur_expression(exp_i, Dict_differentials) for exp_i in exp]
-#     # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
-
-    # masking operation
-    a = substitute.(eqs, Ref(Dict_differentials))
-    println(a)
-    b = substitute.(a, Ref(interp_subs_dict))
-    println(b)
-    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
-    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
-    eqs = substitute.(b, Ref(rev_Dict_differentials))
-    # get losses
-    loss_functions = [NeuralPDE.build_loss_function(pinnrep,
-        eqs[i],
-        pinnrep.pde_indvars[i]) for i in eachindex(eqs)]
-end
-
-# >why not mask differential
-function get_lossy(pinnrep, dataset, eqs)
-    depvars = pinnrep.depvars # order is same as dataset and interps
-    dict_depvar_input = pinnrep.dict_depvar_input
-
-    Dict_differentials = Dict()
-    exp = toexpr(eqs)
-    for exp_i in exp
-        recur_expression(exp_i, Dict_differentials)
-    end
-    # Dict_differentials is now filled with Differential operator => diff_i key-value pairs
-
-    # masking operation
-    println("Dict_differentials : ", Dict_differentials)
-    a = substitute.(eqs, Ref(Dict_differentials))
-    println("Masked Differential term : ", a)
-
-    to_subs, tobe_subs = get_symbols(dict_depvar_input, dataset, depvars, eqs)
-    # for each row in dataset create u values for substituing in equation, n_equations=n_rows
-    eq_subs = [Dict(tobe_subs[depvar] => to_subs[depvar][i] for depvar in depvars)
-               for i in 1:size(dataset[1][:, 1])[1]]
-
-    b = []
-    for eq_sub in eq_subs
-        push!(b, [substitute(a_i, eq_sub) for a_i in a])
-    end
-
-    # reverse dict for re-substituing values of Differential(t)(u(t)) etc
-    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
-
-    c = []
-    for b_i in b
-        push!(c, substitute.(b_i, Ref(rev_Dict_differentials)))
-    end
-    println("After re Substituting depvars : ", c[1])
-    # c = vcat(c...)
-    println(c)
-    c
-    # get losses
-    # loss_functions = [NeuralPDE.build_loss_function(pinnrep,
-    #     c[i, :][j],
-    #     pinnrep.pde_indvars[j]) for j in eachindex(pinnrep.pde_indvars)]
-    # return loss_functions
-end
-
-# dataset_pde has normal matrix format 
-# dataset_bc has format of Vector{typeof(dataset_pde )} as each bc has different domain requirements
-function get_symbols(dict_depvar_input, dataset, depvars, eqs)
-    depvar_vals = [dataset_i[:, 1] for dataset_i in dataset]
-    # order of depvars
-    to_subs = Dict(pinnrep.depvars .=> depvar_vals)
-
-    asrt = Symbolics.get_variables.(eqs)
-    # want only symbols of depvars
-    temp = unique(reduce(vcat, asrt))
-    # now we have all the depvars, we now need all depvars whcih can be substituted with data interps
-
-    tobe_subs = Dict()
-    for a in depvars
-        for i in temp
-            expr = toexpr(i)
-            if (expr isa Expr) && (expr.args[1] == a)
-                tobe_subs[a] = i
-            end
-        end
-    end
-
-    return to_subs, tobe_subs
-end
-
-yuh = get_symbols(pinnrep.dict_depvar_input, dataset, pinnrep.depvars, pinnrep.eqs)
-
 function recur_expression(exp, Dict_differentials)
     for in_exp in exp.args
         if !(in_exp isa Expr)
@@ -353,95 +200,134 @@ function recur_expression(exp, Dict_differentials)
             # Dict_differentials for masking differential terms
             # and resubstituting differentials in equations after putting in interpolations
             # temp = in_exp.args[end]
-            # in_exp.args[end] = Symbolics.variable(in_exp.args[end])
-
-            Dict_differentials[in_exp] = Symbolics.variable("diff_$(length(Dict_differentials)+1)")
+            Dict_differentials[eval(in_exp)] = Symbolics.variable("diff_$(length(Dict_differentials) + 1)")
             return
         else
             recur_expression(in_exp, Dict_differentials)
         end
     end
 end
-vars = Symbolics.variable.(hcat(pinnrep.indvars, pinnrep.depvars))
-toexpr(Differential(t)(Differential(u)(u(t))) + u(t) ~ 0).args[2]
-eqs
-# Differential(t)(u(t)) - cos(p * t) ~ 0
-exprs = toexpr(eqs)
-pop = Dict()
-recur_expression(exprs, pop)
-pop1 = Dict()
-for (a, b) in pop
-    pop1[eval(a)] = b
-end
-pop1
-a = substitute(eqs, pop1)
-
-Symbolics.get_variables(eqs[1])
-# eqs=a
-
-NeuralPDE.get_variables(pinnrep.eqs, pinnrep.dict_indvars, pinnrep.dict_depvars)
-NeuralPDE.get_argument(pinnrep.bcs, pinnrep.dict_indvars, pinnrep.dict_depvars)
-dx = pinnrep.strategy.dx
-eltypeθ = eltype(pinnrep.flat_init_params)
-
-# solve dataset physics loss for heterogenous case
-# create number of equations as number of interpolation and points(n rows)
-# follow masking and finally feed training sets as set in interpolations input of u(t,x,..)
-
-# logic for recursion formula to parse differentials
-# # this below has the whole differential term
-toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
-# toexpr(pde_system.eqs[1]).args[2].args[3].args[3]
-# # .args[1] isa ModelingToolkit.Differential
-
-# # logic for interpolation and indvars splatting to get Equation parsing terms
-# # splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
-# # # splat datasets onto Linear interpolations tables
-# # interps = [LinearInterpolation(splat_i...) for splat_i in splat_form]
-# # interps = Dict(depvars .=> interps)
-# # get datasets into splattable form
-# # splat_form = [[dataset_i[:, i] for i in 1:size(dataset_i)[2]] for dataset_i in dataset]
-# # # splat datasets onto Linear interpolations tables
-# # yu = [LinearInterpolation(splat_i...) for splat_i in splat_form]
-# # Symbol(:($(yu[1])))
-
-# logic to contrauct dict to feed for masking
-# Dict(interps[depvar] => dict_depvar_input[depvar] for depvar in depvars)
 
-@parameters t, p
-@variables u(..)
+@testset "Example 3: 2D Periodic System with New parameter estimation" begin
+    # Cos(pi*t) periodic curve
+    @parameters t, p
+    @variables u(..)
+
+    Dt = Differential(t)
+    eqs = Dt(u(t)) - cos(p * t) ~ 0
+    bcs = [u(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 2.0)]
+
+    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+    initl, st = Lux.setup(Random.default_rng(), chainl)
+
+    @named pde_system = PDESystem(eqs,
+        bcs,
+        domains,
+        [t],
+        [u(t)],
+        [p],
+        defaults = Dict([p => 4.0]))
+
+    analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+    timepoints = collect(0.0:(1 / 100.0):2.0)
+    u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+    u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+    dataset = [hcat(u1, timepoints)]
+
+    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+        dataset = [dataset, nothing])
+
+    # creating dictionary for masking equations
+    eqs = pde_system.eqs
+    Dict_differentials = Dict()
+    exps = toexpr.(eqs)
+    nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)],
+        Dict_differentials = Dict_differentials)
+
+    param = 2 * π
+    ts = vec(sol1.timepoints[1])
+    u_real = [analytic_sol_func1(0.0, t) for t in ts]
+    u_predict = pmean(sol1.ensemblesol[1])
+
+    @test u_predict≈u_real atol=1.5
+    @test mean(u_predict .- u_real) < 0.1
+    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
+end
 
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
+@testset "Example 4: Lorenz System with New parameter estimation" begin
+    @parameters t, σ_
+    @variables x(..), y(..), z(..)
+    Dt = Differential(t)
+    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+        Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 1.0)]
+
+    input_ = length(domains)
+    n = 7
+    chain = [
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+    ]
+
+    #Generate Data
+    function lorenz!(du, u, p, t)
+        du[1] = 10.0 * (u[2] - u[1])
+        du[2] = u[1] * (28.0 - u[3]) - u[2]
+        du[3] = u[1] * u[2] - (8 / 3) * u[3]
+    end
 
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
+    u0 = [1.0; 0.0; 0.0]
+    tspan = (0.0, 1.0)
+    prob = ODEProblem(lorenz!, u0, tspan)
+    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+    ts = sol.t
+    us = hcat(sol.u...)
+    us = us .+ ((0.05 .* randn(size(us))) .* us)
+    ts_ = hcat(sol(ts).t...)[1, :]
+    dataset = [hcat(us[i, :], ts_) for i in 1:3]
 
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
+    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+        dataset = [dataset, nothing])
 
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
+    @named pde_system = PDESystem(eqs, bcs, domains,
+        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
 
-# checking all training strategies
-discretization = BayesianPINN([chainl], GridTraining(0.01), param_estim = true,
-    dataset = [dataset, nothing])
+    # creating dictionary for masking equations
+    eqs = pde_system.eqs
+    Dict_differentials = Dict()
+    exps = toexpr.(eqs)
+    nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 50,
+        bcstd = [0.3, 0.3, 0.3],
+        phystd = [0.1, 0.1, 0.1],
+        l2std = [1, 1, 1],
+        priorsNNw = (0.0, 1.0),
+        saveats = [0.01],
+        param = [Normal(12.0, 2)],
+        Dict_differentials = Dict_differentials)
 
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)], progress = true)
\ No newline at end of file
+    idealp = 10.0
+    p_ = sol1.estimated_de_params[1]
+    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+end
\ No newline at end of file

From 018d505e1d57a5e2d91ce714cb9e0fd88521cd86 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 15 Feb 2024 03:27:10 +0530
Subject: [PATCH 076/107] update BPINN_PDEinvsol_tests.jl

---
 test/BPINN_PDEinvsol_tests.jl | 327 +++++++++++++++-------------------
 1 file changed, 146 insertions(+), 181 deletions(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 5b47ce8fdb..757b80cf0b 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -7,187 +7,152 @@ using ComponentArrays, ModelingToolkit
 
 Random.seed!(100)
 
-# Cos(pit) periodic curve (Parameter Estimation)
-println("Example 1, 2d Periodic System")
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainf = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 1)) |> Flux.f64
-init1, re1 = Flux.destructure(chainf)
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-# plot(dataset[1][:, 2], dataset[1][:, 1])
-# plot!(timepoints, u)
-
-# checking all training strategies
-discretization = NeuralPDE.BayesianPINN([chainl],
-    StochasticTraining(200),
-    param_estim = true, dataset = [dataset, nothing])
-
-ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)], progress = true)
-
-discretization = NeuralPDE.BayesianPINN([chainl],
-    QuasiRandomTraining(200),
-    param_estim = true, dataset = [dataset, nothing])
-
-ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
-discretization = NeuralPDE.BayesianPINN([chainl],
-    QuadratureTraining(), param_estim = true, dataset = [dataset, nothing])
-
-ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
-discretization = NeuralPDE.BayesianPINN([chainl],
-    GridTraining([0.02]),
-    param_estim = true, dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
-discretization = NeuralPDE.BayesianPINN([chainf],
-    GridTraining([0.02]), param_estim = true, dataset = [dataset, nothing])
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.03],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-ts = vec(sol2.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol2.ensemblesol[1])
-
-@test u_predict≈u_real atol=0.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol2.estimated_de_params[1]≈param atol=param * 0.3
-
-## Example Lorenz System (Parameter Estimation)
-println("Example 2, Lorenz System")
-@parameters t, σ_
-@variables x(..), y(..), z(..)
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-
-input_ = length(domains)
-n = 7
-chain = [
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-]
-
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
-end
-
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-ts = sol.t
-us = hcat(sol.u...)
-us = us .+ ((0.05 .* randn(size(us))) .* us)
-ts_ = hcat(sol(ts).t...)[1, :]
-dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-# using Plots, StatsPlots
-# plot(hcat(sol.u...)[1, :], hcat(sol.u...)[2, :], hcat(sol.u...)[3, :])
-# plot!(dataset[1][:, 1], dataset[2][:, 1], dataset[3][:, 1])
-# plot(dataset[1][:, 2:end], dataset[1][:, 1])
-# plot!(dataset[2][:, 2:end], dataset[2][:, 1])
-# plot!(dataset[3][:, 2:end], dataset[3][:, 1])
-
-discretization = NeuralPDE.BayesianPINN(chain, NeuralPDE.GridTraining([0.01]);
-    param_estim = true, dataset = [dataset, nothing])
-
-@named pde_system = PDESystem(eqs, bcs, domains,
-    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 20,
-    bcstd = [0.3, 0.3, 0.3],
-    phystd = [0.1, 0.1, 0.1],
-    l2std = [1, 1, 1],
-    priorsNNw = (0.0, 1.0),
-    saveats = [0.01],
-    param = [Normal(12.0, 2)])
-
-    idealp = 10.0
-    p_ = sol1.estimated_de_params[1]
-    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-end
+# @testset "Example 1: 2D Periodic System with parameter estimation" begin
+#     # Cos(pi*t) periodic curve
+#     @parameters t, p
+#     @variables u(..)
+
+#     Dt = Differential(t)
+#     eqs = Dt(u(t)) - cos(p * t) ~ 0
+#     bcs = [u(0) ~ 0.0]
+#     domains = [t ∈ Interval(0.0, 2.0)]
+
+#     chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+#     initl, st = Lux.setup(Random.default_rng(), chainl)
+
+#     @named pde_system = PDESystem(eqs,
+#         bcs,
+#         domains,
+#         [t],
+#         [u(t)],
+#         [p],
+#         defaults = Dict([p => 4.0]))
+
+#     analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+#     timepoints = collect(0.0:(1 / 100.0):2.0)
+#     u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+#     u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+#     dataset = [hcat(u1, timepoints)]
+
+#     # checking all training strategies
+#     discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
+#         dataset = [dataset, nothing])
+
+#     ahmc_bayesian_pinn_pde(pde_system,
+#         discretization;
+#         draw_samples = 1500,
+#         bcstd = [0.05],
+#         phystd = [0.01], l2std = [0.01],
+#         priorsNNw = (0.0, 1.0),
+#         saveats = [1 / 50.0],
+#         param = [LogNormal(6.0, 0.5)])
+
+#     discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
+#         dataset = [dataset, nothing])
+
+#     ahmc_bayesian_pinn_pde(pde_system,
+#         discretization;
+#         draw_samples = 1500,
+#         bcstd = [0.05],
+#         phystd = [0.01], l2std = [0.01],
+#         priorsNNw = (0.0, 1.0),
+#         saveats = [1 / 50.0],
+#         param = [LogNormal(6.0, 0.5)])
+
+#     discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
+#         dataset = [dataset, nothing])
+
+#     ahmc_bayesian_pinn_pde(pde_system,
+#         discretization;
+#         draw_samples = 1500,
+#         bcstd = [0.05],
+#         phystd = [0.01], l2std = [0.01],
+#         priorsNNw = (0.0, 1.0),
+#         saveats = [1 / 50.0],
+#         param = [LogNormal(6.0, 0.5)])
+
+#     discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+#         dataset = [dataset, nothing])
+
+#     sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#         discretization;
+#         draw_samples = 1500,
+#         bcstd = [0.05],
+#         phystd = [0.01], l2std = [0.01],
+#         priorsNNw = (0.0, 1.0),
+#         saveats = [1 / 50.0],
+#         param = [LogNormal(6.0, 0.5)])
+
+#     param = 2 * π
+#     ts = vec(sol1.timepoints[1])
+#     u_real = [analytic_sol_func1(0.0, t) for t in ts]
+#     u_predict = pmean(sol1.ensemblesol[1])
+
+#     @test u_predict≈u_real atol=1.5
+#     @test mean(u_predict .- u_real) < 0.1
+#     @test sol1.estimated_de_params[1]≈param atol=param * 0.3
+# end
+
+# @testset "Example 2: Lorenz System with parameter estimation" begin
+#     @parameters t, σ_
+#     @variables x(..), y(..), z(..)
+#     Dt = Differential(t)
+#     eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+#         Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+#         Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+#     bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+#     domains = [t ∈ Interval(0.0, 1.0)]
+
+#     input_ = length(domains)
+#     n = 7
+#     chain = [
+#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+#             Lux.Dense(n, 1)),
+#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+#             Lux.Dense(n, 1)),
+#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+#             Lux.Dense(n, 1)),
+#     ]
+
+#     #Generate Data
+#     function lorenz!(du, u, p, t)
+#         du[1] = 10.0 * (u[2] - u[1])
+#         du[2] = u[1] * (28.0 - u[3]) - u[2]
+#         du[3] = u[1] * u[2] - (8 / 3) * u[3]
+#     end
+
+#     u0 = [1.0; 0.0; 0.0]
+#     tspan = (0.0, 1.0)
+#     prob = ODEProblem(lorenz!, u0, tspan)
+#     sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+#     ts = sol.t
+#     us = hcat(sol.u...)
+#     us = us .+ ((0.05 .* randn(size(us))) .* us)
+#     ts_ = hcat(sol(ts).t...)[1, :]
+#     dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+#     discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+#         dataset = [dataset, nothing])
+
+#     @named pde_system = PDESystem(eqs, bcs, domains,
+#         [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+#     sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#         discretization;
+#         draw_samples = 50,
+#         bcstd = [0.3, 0.3, 0.3],
+#         phystd = [0.1, 0.1, 0.1],
+#         l2std = [1, 1, 1],
+#         priorsNNw = (0.0, 1.0),
+#         saveats = [0.01],
+#         param = [Normal(12.0, 2)])
+
+#     idealp = 10.0
+#     p_ = sol1.estimated_de_params[1]
+#     @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+#     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+# end
 
 function recur_expression(exp, Dict_differentials)
     for in_exp in exp.args

From 2115ec5efb1a80b26fe33b616d01e918ef518db1 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 15 Feb 2024 10:42:52 +0530
Subject: [PATCH 077/107] spellings, newloss now optional

---
 src/PDE_BPINN.jl              |  56 ++++---
 src/collocated_estim.jl       |   2 +-
 test/BPINN_PDE_tests.jl       |   2 +-
 test/BPINN_PDEinvsol_tests.jl | 292 +++++++++++++++++-----------------
 4 files changed, 178 insertions(+), 174 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 6ee11fbc7c..4670af8a61 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -260,38 +260,42 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
-        numensemble = floor(Int, draw_samples / 3), Dict_differentials = Dict(),
+        numensemble = floor(Int, draw_samples / 3), Dict_differentials = nothing,
         progress = false, verbose = false)
     pinnrep = symbolic_discretize(pde_system, discretization)
     dataset_pde, dataset_bc = discretization.dataset
 
-    yuh1 = get_lossy(pinnrep, dataset_pde, Dict_differentials)
-    # eqs = pinnrep.bcs
-    # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
-
-    # this is a vector of tuple{vector,nothing}
-    pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
-        GridTraining(0.1),
-        yuh1[i],
-        nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset_pde],
-        train_sets_bc = nothing)
-                          for i in eachindex(yuh1)]
-
-    function L2_loss2(θ, allstd)
-        stdpdes, stdbcs, stdextra = allstd
-        # first vector of losses,from tuple -> pde losses, first[1] pde loss
-        pde_loglikelihoods = [[logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
-                               for (j, pde_loss_function) in enumerate(pde_loss_functions[i][1])]
-                              for i in eachindex(pde_loss_functions)]
-
-        # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
-        #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
-
-        return sum(sum(pde_loglikelihoods))
-        # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+    newloss = if Dict_differentials isa Nothing
+        nothing
+    else
+        yuh1 = get_lossy(pinnrep, dataset_pde, Dict_differentials)
+        # eqs = pinnrep.bcs
+        # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
+
+        # this is a vector of tuple{vector,nothing}
+        pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
+            GridTraining(0.1),
+            yuh1[i],
+            nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset_pde],
+            train_sets_bc = nothing)
+                              for i in eachindex(yuh1)]
+
+        function L2_loss2(θ, allstd)
+            stdpdes, stdbcs, stdextra = allstd
+            # first vector of losses,from tuple -> pde losses, first[1] pde loss
+            pde_loglikelihoods = [[logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
+                                   for (j, pde_loss_function) in enumerate(pde_loss_functions[i][1])]
+                                  for i in eachindex(pde_loss_functions)]
+
+            # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+            #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
+
+            return sum(sum(pde_loglikelihoods))
+            # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+        end
     end
 
-    # WIP split dataset to respective equations
+    # [WIP] add overall functionality for BC dataset points
     if ((dataset_bc isa Nothing) && (dataset_pde isa Nothing))
         dataset = nothing
     elseif dataset_bc isa Nothing
diff --git a/src/collocated_estim.jl b/src/collocated_estim.jl
index b113b76f12..a2f81b3ed9 100644
--- a/src/collocated_estim.jl
+++ b/src/collocated_estim.jl
@@ -175,7 +175,7 @@ function calculate_derivatives(dataset)
     # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
     # interp = CubicSpline(u, t)
     # interp1 = CubicSpline(u1, t)
-    # # derrivatives interpolation
+    # # derivatives interpolation
     # dx = t[2] - t[1]
     # time = collect(t[1]:dx:t[end])
     # smoothu = [interp(i) for i in time]
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 5589039584..1388722ce4 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -222,4 +222,4 @@ end
     u_predict = pmean(sol.ensemblesol[1])
 
     @test u_predict≈u_real atol=0.8
-end
\ No newline at end of file
+end
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 757b80cf0b..d2b81f4305 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -7,152 +7,152 @@ using ComponentArrays, ModelingToolkit
 
 Random.seed!(100)
 
-# @testset "Example 1: 2D Periodic System with parameter estimation" begin
-#     # Cos(pi*t) periodic curve
-#     @parameters t, p
-#     @variables u(..)
-
-#     Dt = Differential(t)
-#     eqs = Dt(u(t)) - cos(p * t) ~ 0
-#     bcs = [u(0) ~ 0.0]
-#     domains = [t ∈ Interval(0.0, 2.0)]
-
-#     chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-#     initl, st = Lux.setup(Random.default_rng(), chainl)
-
-#     @named pde_system = PDESystem(eqs,
-#         bcs,
-#         domains,
-#         [t],
-#         [u(t)],
-#         [p],
-#         defaults = Dict([p => 4.0]))
-
-#     analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-#     timepoints = collect(0.0:(1 / 100.0):2.0)
-#     u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-#     u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-#     dataset = [hcat(u1, timepoints)]
-
-#     # checking all training strategies
-#     discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
-#         dataset = [dataset, nothing])
-
-#     ahmc_bayesian_pinn_pde(pde_system,
-#         discretization;
-#         draw_samples = 1500,
-#         bcstd = [0.05],
-#         phystd = [0.01], l2std = [0.01],
-#         priorsNNw = (0.0, 1.0),
-#         saveats = [1 / 50.0],
-#         param = [LogNormal(6.0, 0.5)])
-
-#     discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
-#         dataset = [dataset, nothing])
-
-#     ahmc_bayesian_pinn_pde(pde_system,
-#         discretization;
-#         draw_samples = 1500,
-#         bcstd = [0.05],
-#         phystd = [0.01], l2std = [0.01],
-#         priorsNNw = (0.0, 1.0),
-#         saveats = [1 / 50.0],
-#         param = [LogNormal(6.0, 0.5)])
-
-#     discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
-#         dataset = [dataset, nothing])
-
-#     ahmc_bayesian_pinn_pde(pde_system,
-#         discretization;
-#         draw_samples = 1500,
-#         bcstd = [0.05],
-#         phystd = [0.01], l2std = [0.01],
-#         priorsNNw = (0.0, 1.0),
-#         saveats = [1 / 50.0],
-#         param = [LogNormal(6.0, 0.5)])
-
-#     discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-#         dataset = [dataset, nothing])
-
-#     sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#         discretization;
-#         draw_samples = 1500,
-#         bcstd = [0.05],
-#         phystd = [0.01], l2std = [0.01],
-#         priorsNNw = (0.0, 1.0),
-#         saveats = [1 / 50.0],
-#         param = [LogNormal(6.0, 0.5)])
-
-#     param = 2 * π
-#     ts = vec(sol1.timepoints[1])
-#     u_real = [analytic_sol_func1(0.0, t) for t in ts]
-#     u_predict = pmean(sol1.ensemblesol[1])
-
-#     @test u_predict≈u_real atol=1.5
-#     @test mean(u_predict .- u_real) < 0.1
-#     @test sol1.estimated_de_params[1]≈param atol=param * 0.3
-# end
-
-# @testset "Example 2: Lorenz System with parameter estimation" begin
-#     @parameters t, σ_
-#     @variables x(..), y(..), z(..)
-#     Dt = Differential(t)
-#     eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-#         Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-#         Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-#     bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-#     domains = [t ∈ Interval(0.0, 1.0)]
-
-#     input_ = length(domains)
-#     n = 7
-#     chain = [
-#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-#             Lux.Dense(n, 1)),
-#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-#             Lux.Dense(n, 1)),
-#         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-#             Lux.Dense(n, 1)),
-#     ]
-
-#     #Generate Data
-#     function lorenz!(du, u, p, t)
-#         du[1] = 10.0 * (u[2] - u[1])
-#         du[2] = u[1] * (28.0 - u[3]) - u[2]
-#         du[3] = u[1] * u[2] - (8 / 3) * u[3]
-#     end
-
-#     u0 = [1.0; 0.0; 0.0]
-#     tspan = (0.0, 1.0)
-#     prob = ODEProblem(lorenz!, u0, tspan)
-#     sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-#     ts = sol.t
-#     us = hcat(sol.u...)
-#     us = us .+ ((0.05 .* randn(size(us))) .* us)
-#     ts_ = hcat(sol(ts).t...)[1, :]
-#     dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-#     discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-#         dataset = [dataset, nothing])
-
-#     @named pde_system = PDESystem(eqs, bcs, domains,
-#         [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-#     sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#         discretization;
-#         draw_samples = 50,
-#         bcstd = [0.3, 0.3, 0.3],
-#         phystd = [0.1, 0.1, 0.1],
-#         l2std = [1, 1, 1],
-#         priorsNNw = (0.0, 1.0),
-#         saveats = [0.01],
-#         param = [Normal(12.0, 2)])
-
-#     idealp = 10.0
-#     p_ = sol1.estimated_de_params[1]
-#     @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-#     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-# end
+@testset "Example 1: 2D Periodic System with parameter estimation" begin
+    # Cos(pi*t) periodic curve
+    @parameters t, p
+    @variables u(..)
+
+    Dt = Differential(t)
+    eqs = Dt(u(t)) - cos(p * t) ~ 0
+    bcs = [u(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 2.0)]
+
+    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+    initl, st = Lux.setup(Random.default_rng(), chainl)
+
+    @named pde_system = PDESystem(eqs,
+        bcs,
+        domains,
+        [t],
+        [u(t)],
+        [p],
+        defaults = Dict([p => 4.0]))
+
+    analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+    timepoints = collect(0.0:(1 / 100.0):2.0)
+    u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+    u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+    dataset = [hcat(u1, timepoints)]
+
+    # checking all training strategies
+    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
+        dataset = [dataset, nothing])
+
+    ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
+        dataset = [dataset, nothing])
+
+    ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
+        dataset = [dataset, nothing])
+
+    ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+        dataset = [dataset, nothing])
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    param = 2 * π
+    ts = vec(sol1.timepoints[1])
+    u_real = [analytic_sol_func1(0.0, t) for t in ts]
+    u_predict = pmean(sol1.ensemblesol[1])
+
+    @test u_predict≈u_real atol=1.5
+    @test mean(u_predict .- u_real) < 0.1
+    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
+end
+
+@testset "Example 2: Lorenz System with parameter estimation" begin
+    @parameters t, σ_
+    @variables x(..), y(..), z(..)
+    Dt = Differential(t)
+    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+        Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 1.0)]
+
+    input_ = length(domains)
+    n = 7
+    chain = [
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+    ]
+
+    #Generate Data
+    function lorenz!(du, u, p, t)
+        du[1] = 10.0 * (u[2] - u[1])
+        du[2] = u[1] * (28.0 - u[3]) - u[2]
+        du[3] = u[1] * u[2] - (8 / 3) * u[3]
+    end
+
+    u0 = [1.0; 0.0; 0.0]
+    tspan = (0.0, 1.0)
+    prob = ODEProblem(lorenz!, u0, tspan)
+    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+    ts = sol.t
+    us = hcat(sol.u...)
+    us = us .+ ((0.05 .* randn(size(us))) .* us)
+    ts_ = hcat(sol(ts).t...)[1, :]
+    dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+        dataset = [dataset, nothing])
+
+    @named pde_system = PDESystem(eqs, bcs, domains,
+        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 50,
+        bcstd = [0.3, 0.3, 0.3],
+        phystd = [0.1, 0.1, 0.1],
+        l2std = [1, 1, 1],
+        priorsNNw = (0.0, 1.0),
+        saveats = [0.01],
+        param = [Normal(12.0, 2)])
+
+    idealp = 10.0
+    p_ = sol1.estimated_de_params[1]
+    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+end
 
 function recur_expression(exp, Dict_differentials)
     for in_exp in exp.args

From 3b77a1311c037e6bc7aa8fc3e6c48da2f6455a90 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 15 Feb 2024 15:46:01 +0530
Subject: [PATCH 078/107] removed length reweighing in BPINN ode, testset for
 recur..

---
 test/BPINN_PDEinvsol_tests.jl | 239 +++++++++++++++++-----------------
 1 file changed, 118 insertions(+), 121 deletions(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index d2b81f4305..00c3cba8f6 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -173,126 +173,123 @@ function recur_expression(exp, Dict_differentials)
     end
 end
 
-@testset "Example 3: 2D Periodic System with New parameter estimation" begin
-    # Cos(pi*t) periodic curve
-    @parameters t, p
-    @variables u(..)
-
-    Dt = Differential(t)
-    eqs = Dt(u(t)) - cos(p * t) ~ 0
-    bcs = [u(0) ~ 0.0]
-    domains = [t ∈ Interval(0.0, 2.0)]
-
-    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-    initl, st = Lux.setup(Random.default_rng(), chainl)
-
-    @named pde_system = PDESystem(eqs,
-        bcs,
-        domains,
-        [t],
-        [u(t)],
-        [p],
-        defaults = Dict([p => 4.0]))
-
-    analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-    timepoints = collect(0.0:(1 / 100.0):2.0)
-    u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-    u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-    dataset = [hcat(u1, timepoints)]
-
-    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-        dataset = [dataset, nothing])
-
-    # creating dictionary for masking equations
-    eqs = pde_system.eqs
-    Dict_differentials = Dict()
-    exps = toexpr.(eqs)
-    nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)],
-        Dict_differentials = Dict_differentials)
-
-    param = 2 * π
-    ts = vec(sol1.timepoints[1])
-    u_real = [analytic_sol_func1(0.0, t) for t in ts]
-    u_predict = pmean(sol1.ensemblesol[1])
-
-    @test u_predict≈u_real atol=1.5
-    @test mean(u_predict .- u_real) < 0.1
-    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
+println("Example 3: 2D Periodic System with New parameter estimation")
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+    dataset = [dataset, nothing])
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)],
+    Dict_differentials = Dict_differentials)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+println("Example 4: Lorenz System with New parameter estimation")
+@parameters t, σ_
+@variables x(..), y(..), z(..)
+Dt = Differential(t)
+eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 1.0)]
+
+input_ = length(domains)
+n = 7
+chain = [
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+]
+
+#Generate Data
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
 end
 
-@testset "Example 4: Lorenz System with New parameter estimation" begin
-    @parameters t, σ_
-    @variables x(..), y(..), z(..)
-    Dt = Differential(t)
-    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-        Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-    domains = [t ∈ Interval(0.0, 1.0)]
-
-    input_ = length(domains)
-    n = 7
-    chain = [
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
-    ]
-
-    #Generate Data
-    function lorenz!(du, u, p, t)
-        du[1] = 10.0 * (u[2] - u[1])
-        du[2] = u[1] * (28.0 - u[3]) - u[2]
-        du[3] = u[1] * u[2] - (8 / 3) * u[3]
-    end
-
-    u0 = [1.0; 0.0; 0.0]
-    tspan = (0.0, 1.0)
-    prob = ODEProblem(lorenz!, u0, tspan)
-    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-    ts = sol.t
-    us = hcat(sol.u...)
-    us = us .+ ((0.05 .* randn(size(us))) .* us)
-    ts_ = hcat(sol(ts).t...)[1, :]
-    dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-        dataset = [dataset, nothing])
-
-    @named pde_system = PDESystem(eqs, bcs, domains,
-        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-    # creating dictionary for masking equations
-    eqs = pde_system.eqs
-    Dict_differentials = Dict()
-    exps = toexpr.(eqs)
-    nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-    sol1 = ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 50,
-        bcstd = [0.3, 0.3, 0.3],
-        phystd = [0.1, 0.1, 0.1],
-        l2std = [1, 1, 1],
-        priorsNNw = (0.0, 1.0),
-        saveats = [0.01],
-        param = [Normal(12.0, 2)],
-        Dict_differentials = Dict_differentials)
-
-    idealp = 10.0
-    p_ = sol1.estimated_de_params[1]
-    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-end
\ No newline at end of file
+u0 = [1.0; 0.0; 0.0]
+tspan = (0.0, 1.0)
+prob = ODEProblem(lorenz!, u0, tspan)
+sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+ts = sol.t
+us = hcat(sol.u...)
+us = us .+ ((0.05 .* randn(size(us))) .* us)
+ts_ = hcat(sol(ts).t...)[1, :]
+dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+    dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs, bcs, domains,
+    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 50,
+    bcstd = [0.3, 0.3, 0.3],
+    phystd = [0.1, 0.1, 0.1],
+    l2std = [1, 1, 1],
+    priorsNNw = (0.0, 1.0),
+    saveats = [0.01],
+    param = [Normal(12.0, 2)],
+    Dict_differentials = Dict_differentials)
+
+idealp = 10.0
+p_ = sol1.estimated_de_params[1]
+@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]

From 8037fa0fc2521d1ac94325a5ed288c4324f21311 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 17 Feb 2024 01:47:58 +0530
Subject: [PATCH 079/107] corrected tests, datasetnew format

---
 src/PDE_BPINN.jl    |  34 ++++--
 src/discretize.jl   |   2 +-
 test/BPINN_Tests.jl | 245 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 271 insertions(+), 10 deletions(-)
 create mode 100644 test/BPINN_Tests.jl

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 4670af8a61..2df131f6e4 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -255,8 +255,10 @@ end
     releases.
 """
 function ahmc_bayesian_pinn_pde(pde_system, discretization;
-        draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],
-        priorsNNw = (0.0, 2.0), param = [], nchains = 1, Kernel = HMC(0.1, 30),
+        draw_samples = 1000,
+        bcstd = [0.01], l2std = [0.05],
+        phystd = [0.05], phystdnew = [0.05], priorsNNw = (0.0, 2.0),
+        param = [], nchains = 1, Kernel = HMC(0.1, 30),
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
@@ -272,18 +274,24 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         # eqs = pinnrep.bcs
         # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
 
+        # consider all dataset domain points and for each row new set of equation loss function
         # this is a vector of tuple{vector,nothing}
         pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
             GridTraining(0.1),
             yuh1[i],
-            nothing; train_sets_pde = [data_pde[i, :] for data_pde in dataset_pde],
+            nothing;
+            # pass transformation of each dataset row-corresponds to each point, for each depvar dataset point merged equation vector
+            train_sets_pde = get_dataset_train_points(pde_system.eqs,
+                [Array(data[i, :]') for data in dataset_pde],
+                pinnrep),
             train_sets_bc = nothing)
                               for i in eachindex(yuh1)]
 
         function L2_loss2(θ, allstd)
-            stdpdes, stdbcs, stdextra = allstd
+            stdpdesnew = allstd[4]
+
             # first vector of losses,from tuple -> pde losses, first[1] pde loss
-            pde_loglikelihoods = [[logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
+            pde_loglikelihoods = [[logpdf(Normal(0, stdpdesnew[j]), pde_loss_function(θ))
                                    for (j, pde_loss_function) in enumerate(pde_loss_functions[i][1])]
                                   for i in eachindex(pde_loss_functions)]
 
@@ -357,10 +365,18 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # vector in case of N-dimensional domains
     strategy = discretization.strategy
 
-    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
-    ℓπ = PDELogTargetDensity(
-        nparameters, strategy, dataset, priors, [phystd, bcstd, l2std],
-        names, ninv, initial_nnθ, full_weighted_loglikelihood, Φ)
+    # dimensions would be total no of params,initial_nnθ for Lux namedTuples 
+    ℓπ = PDELogTargetDensity(nparameters,
+        strategy,
+        dataset,
+        priors,
+        [phystd, bcstd, l2std, phystdnew],
+        names,
+        ninv,
+        initial_nnθ,
+        full_weighted_loglikelihood,
+        newloss,
+        Φ)
 
     Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
     Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
diff --git a/src/discretize.jl b/src/discretize.jl
index bed027aa2f..b49319742b 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -539,7 +539,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
         end
 
         function full_loss_function(θ, allstd::Vector{Vector{Float64}})
-            stdpdes, stdbcs, stdextra = allstd
+            stdpdes, stdbcs, stdextra, stdpdesnew = allstd
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
             pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
                                   for (i, pde_loss_function) in enumerate(pde_loss_functions)]
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
new file mode 100644
index 0000000000..2fe347b3b4
--- /dev/null
+++ b/test/BPINN_Tests.jl
@@ -0,0 +1,245 @@
+# # Testing Code
+using Test, MCMCChains
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using OptimizationOptimisers, AdvancedHMC, Lux
+using Statistics, Random, Functors, ComponentArrays
+using NeuralPDE, MonteCarloMeasurements
+using Flux
+
+# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
+# on latest Julia version it performs much better for below tests
+Random.seed!(100)
+
+@testset "Example 1 - without parameter estimation" begin
+    linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+    linear = (u, p, t) -> cos(2 * π * t)
+    tspan = (0.0, 2.0)
+    u0 = 0.0
+    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+    p = prob.p
+
+    # Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
+    ta = range(tspan[1], tspan[2], length = 300)
+    u = [linear_analytic(u0, nothing, ti) for ti in ta]
+    x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+    time = vec(collect(Float64, ta))
+    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+    # testing points for solve() call must match saveat(1/50.0) arg
+    ta0 = range(tspan[1], tspan[2], length = 101)
+    u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
+    x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
+    time1 = vec(collect(Float64, ta0))
+    physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+    chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+    θinit, st = Lux.setup(Random.default_rng(), chainlux)
+
+    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainlux, draw_samples = 2500)
+
+    alg = BNNODE(chainlux, draw_samples = 2500)
+    sol1lux = solve(prob, alg)
+
+    # testing points
+    t = time
+    # Mean of last 500 sampled parameter's curves[Ensemble predictions]
+    θ = [vector_to_parameters(fhsamples[i], θinit) for i in 2000:length(fhsamples)]
+    luxar = [chainlux(t', θ[i], st)[1] for i in eachindex(θ)]
+    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+    meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+    # --------------------- ahmc_bayesian_pinn_ode() call
+    @test mean(abs.(x̂ .- meanscurve)) < 0.05
+    @test mean(abs.(physsol1 .- meanscurve)) < 0.005
+
+    #--------------------- solve() call 
+    @test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
+    @test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
+end
+
+@testset "Example 2 - with parameter estimation" begin
+    linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+    linear = (u, p, t) -> cos(p * t)
+    tspan = (0.0, 2.0)
+    u0 = 0.0
+    p = 2 * pi
+    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
+
+    # Numerical and Analytical Solutions
+    sol1 = solve(prob, Tsit5(); saveat = 0.01)
+    u = sol1.u
+    time = sol1.t
+
+    # BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
+    ta = range(tspan[1], tspan[2], length = 100)
+    u = [linear_analytic(u0, p, ti) for ti in ta]
+    x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+    time = vec(collect(Float64, ta))
+    dataset = [x̂, time]
+    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+    # testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
+    ta0 = range(tspan[1], tspan[2], length = 101)
+    u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+    x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
+    time1 = vec(collect(Float64, ta0))
+    physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+    chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+    θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+
+    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainlux1,
+        dataset = dataset,
+        draw_samples = 2500,
+        physdt = 1 / 50.0,
+        priorsNNw = (0.0, 3.0),
+        param = [LogNormal(9, 0.5)])
+
+    alg = BNNODE(chainlux1, dataset = dataset,
+        draw_samples = 2500,
+        physdt = 1 / 50.0,
+        priorsNNw = (0.0,
+            3.0),
+        param = [
+            LogNormal(9,
+                0.5),
+        ])
+
+    sol2lux = solve(prob, alg)
+
+    # testing points
+    t = time
+    # Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+    θ = [vector_to_parameters(fhsamples[i][1:(end - 1)], θinit) for i in 2000:length(fhsamples)]
+    luxar = [chainlux1(t', θ[i], st)[1] for i in eachindex(θ)]
+    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+    meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+    # --------------------- ahmc_bayesian_pinn_ode() call  
+    @test mean(abs.(physsol1 .- meanscurve)) < 0.15
+
+    # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+    @test abs(p - mean([fhsamples[i][23] for i in 2000:length(fhsamples)])) < abs(0.35 * p)
+
+    #-------------------------- solve() call  
+    @test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
+
+    # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+    @test abs(p - sol2lux.estimated_de_params[1]) < abs(0.15 * p)
+end
+
+@testset "Example 3" begin
+    linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
+    tspan = (0.0, 10.0)
+    u0 = 0.0
+    p = -5.0
+    prob = ODEProblem(linear, u0, tspan, p)
+    linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
+
+    # SOLUTION AND CREATE DATASET
+    sol = solve(prob, Tsit5(); saveat = 0.1)
+    u = sol.u
+    time = sol.t
+    x̂ = u .+ (u .* 0.2) .* randn(size(u))
+    dataset = [x̂, time]
+    t = sol.t
+    physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)]
+
+    ta0 = range(tspan[1], tspan[2], length = 501)
+    u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+    time1 = vec(collect(Float64, ta0))
+    physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+    chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+    θinit, st = Lux.setup(Random.default_rng(), chainlux12)
+
+    fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+        draw_samples = 1500,
+        l2std = [0.03],
+        phystd = [0.03],
+        priorsNNw = (0.0,
+            10.0))
+
+    fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+        dataset = dataset,
+        draw_samples = 1500,
+        l2std = [0.03],
+        phystd = [0.03],
+        priorsNNw = (0.0,
+            10.0),
+        param = [
+            Normal(-7,
+                4),
+        ])
+
+    alg = BNNODE(chainlux12,
+        dataset = dataset,
+        draw_samples = 1500,
+        l2std = [0.03],
+        phystd = [0.03],
+        priorsNNw = (0.0,
+            10.0),
+        param = [
+            Normal(-7,
+                4),
+        ])
+
+    sol3lux_pestim = solve(prob, alg)
+
+    # testing timepoints
+    t = sol.t
+    #------------------------------ ahmc_bayesian_pinn_ode() call
+    # Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
+    θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:length(fhsampleslux12)]
+    luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
+    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+    meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+    θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:length(fhsampleslux22)]
+    luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
+    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+    meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+    @test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
+    @test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
+    @test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
+    @test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
+
+    # estimated parameters(lux chain)
+    param1 = mean(i[62] for i in fhsampleslux22[1000:length(fhsampleslux22)])
+    @test abs(param1 - p) < abs(0.3 * p)
+
+    #-------------------------- solve() call 
+    # (lux chain)
+    @test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
+    # estimated parameters(lux chain)
+    param1 = sol3lux_pestim.estimated_de_params[1]
+    @test abs(param1 - p) < abs(0.45 * p)
+end
+
+@testset "Translating from Flux" begin
+    linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+    linear = (u, p, t) -> cos(2 * π * t)
+    tspan = (0.0, 2.0)
+    u0 = 0.0
+    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+    p = prob.p
+
+    # Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
+    ta = range(tspan[1], tspan[2], length = 300)
+    u = [linear_analytic(u0, nothing, ti) for ti in ta]
+    x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+    time = vec(collect(Float64, ta))
+    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+    # testing points for solve() call must match saveat(1/50.0) arg
+    ta0 = range(tspan[1], tspan[2], length = 101)
+    u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
+    x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
+    time1 = vec(collect(Float64, ta0))
+    physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+    chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainflux, draw_samples = 2500)
+    alg = BNNODE(chainflux, draw_samples = 2500)
+    @test alg.chain isa Lux.AbstractExplicitLayer
+end

From 2f0e1f5062ca8d8e640d9077b362da0cbdc9c86e Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 22 Feb 2024 00:32:26 +0530
Subject: [PATCH 080/107] changes from reviews

---
 src/PDE_BPINN.jl              |   1 -
 src/training_strategies.jl    |  18 +-
 test/BPINN_PDEinvsol_tests.jl | 303 ++++++++++++++++++++++++++++------
 3 files changed, 255 insertions(+), 67 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 2df131f6e4..e940c8c5e1 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -395,7 +395,6 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
 
     # parallel sampling option
     if nchains != 1
-
         # Cache to store the chains
         bpinnsols = Vector{Any}(undef, nchains)
 
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index 9c78d9979a..e008d131aa 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -25,20 +25,10 @@ function get_dataset_train_points(eqs, train_sets, pinnrep)
     eq_args = NeuralPDE.get_argument(eqs, dict_indvars, dict_depvars)
     # [[:t]]
 
-    points = []
-    for eq_arg in eq_args
-        a = []
-        # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
-        for i in eachindex(symbols_input)
-            if symbols_input[i][2] == eq_arg
-                # include domain points of that depvar
-                # each loss equation take domain matrix [points..;points..]
-                push!(a, train_sets[i][:, 2:end]')
-            end
-        end
-        # vcat as new row for next equation
-        push!(points, vcat(a...))
-    end
+    points = [vcat([train_sets[i][:, 2:end]'
+                    for i in eachindex(symbols_input) if symbols_input[i][2] == eq_arg]...)
+              for eq_arg in eq_args]
+
     return points
 end
 
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 00c3cba8f6..a285dcb443 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -216,7 +216,7 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0],
     param = [LogNormal(6.0, 0.5)],
-    Dict_differentials = Dict_differentials)
+    Dict_differentials = Dict_differentials, progress = true)
 
 param = 2 * π
 ts = vec(sol1.timepoints[1])
@@ -227,69 +227,268 @@ u_predict = pmean(sol1.ensemblesol[1])
 @test mean(u_predict .- u_real) < 0.1
 @test sol1.estimated_de_params[1]≈param atol=param * 0.3
 
-println("Example 4: Lorenz System with New parameter estimation")
-@parameters t, σ_
-@variables x(..), y(..), z(..)
+println("Example 3: Lotka Volterra with New parameter estimation")
+@parameters t α β γ δ
+@variables x(..) y(..)
+
 Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-
-input_ = length(domains)
-n = 7
-chain = [
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
+eqs = [Dt(x(t)) ~ α * x(t) - β * x(t) * y(t), Dt(y(t)) ~ -γ * y(t) + δ * x(t) * y(t)]
+bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
+domains = [t ∈ Interval(0.0, 4.0)]
+
+# Define the parameters' values
+# params = [α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]
+# p = [1.5, 1.0, 3.0, 1.0]
+
+chainl = [
+    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+        Lux.Dense(6, 1)),
+    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+        Lux.Dense(6, 1)),
 ]
 
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
+initl, st = Lux.setup(Random.default_rng(), chainl[1])
+initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [x(t), y(t)],
+    [α, β, γ, δ],
+    defaults = Dict([α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]))
+
+using NeuralPDE, Lux, Plots, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    α, β, γ, δ = p
+    x, y = u
+    dx = (α - β * y) * x
+    dy = (δ * x - γ) * y
+    return [dx, dy]
 end
 
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-ts = sol.t
-us = hcat(sol.u...)
-us = us .+ ((0.05 .* randn(size(us))) .* us)
-ts_ = hcat(sol(ts).t...)[1, :]
-dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 4.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+# Solve using OrdinaryDiffEq.jl solver
+dt = 0.05
+solution = solve(prob, Tsit5(); saveat = dt)
+
+# Extract solution
+time = solution.t
+u = hcat(solution.u...)
+# plot(time, u[1, :])
+# plot!(time, u[2, :])
+# Construct dataset
+dataset = [hcat(u[i, :], time) for i in 1:2]
+
+discretization = BayesianPINN(chainl, GridTraining(0.01), param_estim = true,
     dataset = [dataset, nothing])
 
-@named pde_system = PDESystem(eqs, bcs, domains,
-    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
 # creating dictionary for masking equations
 eqs = pde_system.eqs
 Dict_differentials = Dict()
 exps = toexpr.(eqs)
 nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
 
+sol = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+    ], progress = true)
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
 sol1 = ahmc_bayesian_pinn_pde(pde_system,
     discretization;
-    draw_samples = 50,
-    bcstd = [0.3, 0.3, 0.3],
-    phystd = [0.1, 0.1, 0.1],
-    l2std = [1, 1, 1],
-    priorsNNw = (0.0, 1.0),
-    saveats = [0.01],
-    param = [Normal(12.0, 2)],
-    Dict_differentials = Dict_differentials)
-
-idealp = 10.0
-p_ = sol1.estimated_de_params[1]
-@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    phystdnew = [0.1, 0.1],
+    #  Kernel = AdvancedHMC.NUTS(0.8),
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+    ],
+    Dict_differentials = Dict_differentials, progress = true)
+
+# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
+# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+# points1 = []
+# for eq_arg in eq_args
+#     a = []
+#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
+#     for i in eachindex(symbols_input)
+#         if symbols_input[i][2] == eq_arg
+#             # include domain points of that depvar
+#             # each loss equation take domain matrix [points..;points..]
+#             push!(a, train_sets[i][:, 2:end]')
+#         end
+#     end
+#     # vcat as new row for next equation
+#     push!(points1, vcat(a...))
+# end
+# println(points1 == points)
+
+using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+import ModelingToolkit: Interval, infimum, supremum, Distributions
+using Plots, MonteCarloMeasurements
+
+@parameters x, t, α
+@variables u(..)
+Dt = Differential(t)
+Dx = Differential(x)
+Dx2 = Differential(x)^2
+Dx3 = Differential(x)^3
+Dx4 = Differential(x)^4
+
+# α = 1
+β = 4
+γ = 1
+eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+bcs = [u(x, 0) ~ u_analytic(x, 0),
+    u(-10, t) ~ u_analytic(-10, t),
+    u(10, t) ~ u_analytic(10, t),
+    Dx(u(-10, t)) ~ du(-10, t),
+    Dx(u(10, t)) ~ du(10, t)]
+
+# Space and time domains
+domains = [x ∈ Interval(-10.0, 10.0),
+    t ∈ Interval(0.0, 1.0)]
+
+# Discretization
+dx = 0.4;
+dt = 0.2;
+
+# Function to compute analytical solution at a specific point (x, t)
+function u_analytic_point(x, t)
+    z = -x / 2 + t
+    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+end
+
+# Function to generate the dataset matrix
+function generate_dataset_matrix(domains, dx, dt)
+    x_values = -10:dx:10
+    t_values = 0.0:dt:1.0
+
+    dataset = []
+
+    for t in t_values
+        for x in x_values
+            u_value = u_analytic_point(x, t)
+            push!(dataset, [u_value, x, t])
+        end
+    end
+
+    return vcat([data' for data in dataset]...)
+end
+
+datasetpde = [generate_dataset_matrix(domains, dx, dt)]
+
+# noise to dataset
+noisydataset = deepcopy(datasetpde)
+noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
+                        randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
+                        noisydataset[1][:, 1]
+
+# plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+# plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+# Neural network
+chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+    Lux.Dense(8, 8, Lux.tanh),
+    Lux.Dense(8, 1))
+
+discretization = NeuralPDE.BayesianPINN([chain],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+@named pde_system = PDESystem(eq,
+    bcs,
+    domains,
+    [x, t],
+    [u(x, t)],
+    [α],
+    defaults = Dict([α => 0.5]))
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100,
+    bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+    phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 100.0, 1 / 100.0], progress = true)
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100,
+    bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+    phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
+    param = [Distributions.LogNormal(0.5, 2)],
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
+    progress = true)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, [dx / 10, dt])]
+u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+# p1 = plot(xs, u_predict, title = "predict")
+# p2 = plot(xs, u_real, title = "analytic")
+# p3 = plot(xs, diff_u, title = "error")
+# plot(p1, p2, p3)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, [dx / 10, dt])]
+u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+# p1 = plot(xs, u_predict, title = "predict")
+# p2 = plot(xs, u_real, title = "analytic")
+# p3 = plot(xs, diff_u, title = "error")
+# plot(p1, p2, p3)

From 8c350591dcef5c8a24522a9045fa8e3a70dc5f81 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 27 Feb 2024 02:44:21 +0530
Subject: [PATCH 081/107] refactor code, Corrected PDE_BPINN Logphys calc.

---
 src/PDE_BPINN.jl              |   3 +-
 src/discretize.jl             |  46 +++-
 src/training_strategies.jl    |  52 ++++-
 test/BPINN_PDEinvsol_tests.jl | 386 +++++++++++++++++++++-------------
 4 files changed, 324 insertions(+), 163 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index e940c8c5e1..ddac125da6 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -276,7 +276,8 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
 
         # consider all dataset domain points and for each row new set of equation loss function
         # this is a vector of tuple{vector,nothing}
-        pde_loss_functions = [merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
+        pde_loss_functions = [merge_strategy_with_loglikelihood_function(
+            pinnrep::PINNRepresentation,
             GridTraining(0.1),
             yuh1[i],
             nothing;
diff --git a/src/discretize.jl b/src/discretize.jl
index b49319742b..d90e04fb20 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -445,13 +445,14 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
     # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
     num_additional_loss = convert(Int, additional_loss !== nothing)
 
-    adaloss_T = eltype(adaloss.pde_loss_weights)
+        adaloss_T = eltype(adaloss.pde_loss_weights)
 
-    # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
-    adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .* adaloss.pde_loss_weights
-    adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
-    adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
-                                      adaloss.additional_loss_weights
+        # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
+        adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .*
+                                   adaloss.pde_loss_weights
+        adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
+        adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
+                                          adaloss.additional_loss_weights
 
     reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
         pde_loss_functions, bc_loss_functions)
@@ -520,10 +521,36 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
             return full_weighted_loss
         end
 
-        return full_loss_function
+        return bc_loss_functions, pde_loss_functions, full_loss_function
     end
 
     function get_likelihood_estimate_function(discretization::BayesianPINN)
+        # Because seperate reweighting code section needed and loglikelihood is pointwise independant
+        pde_loss_functions, bc_loss_functions = merge_strategy_with_loglikelihood_function(
+            pinnrep,
+            strategy,
+            datafree_pde_loss_functions,
+            datafree_bc_loss_functions)
+
+        # setup for all adaptive losses
+        num_pde_losses = length(pde_loss_functions)
+        num_bc_losses = length(bc_loss_functions)
+        # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
+        num_additional_loss = additional_loss isa Nothing ? 0 : 1
+
+        adaloss_T = eltype(adaloss.pde_loss_weights)
+
+        # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
+        adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .*
+                                   adaloss.pde_loss_weights
+        adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
+        adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
+                                          adaloss.additional_loss_weights
+
+        reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
+            pde_loss_functions,
+            bc_loss_functions)
+
         dataset_pde, dataset_bc = discretization.dataset
 
         # required as Physics loss also needed on the discrete dataset domain points
@@ -592,10 +619,11 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
             return full_weighted_loglikelihood
         end
 
-        return full_loss_function
+        return bc_loss_functions, pde_loss_functions, full_loss_function
     end
 
-    full_loss_function = get_likelihood_estimate_function(discretization)
+    bc_loss_functions, pde_loss_functions, full_loss_function = get_likelihood_estimate_function(discretization)
+
     pinnrep.loss_functions = PINNLossFunctions(bc_loss_functions, pde_loss_functions,
         full_loss_function, additional_loss, datafree_pde_loss_functions,
         datafree_bc_loss_functions)
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index e008d131aa..a9240488df 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -14,6 +14,7 @@ corresponding to the grid spacing in each dimension.
     dx
 end
 
+# dataset must have depvar values for same values of indvars
 function get_dataset_train_points(eqs, train_sets, pinnrep)
     dict_depvar_input = pinnrep.dict_depvar_input
     depvars = pinnrep.depvars
@@ -23,11 +24,28 @@ function get_dataset_train_points(eqs, train_sets, pinnrep)
     symbols_input = [(i, dict_depvar_input[i]) for i in depvars]
     # [(:u, [:t])]
     eq_args = NeuralPDE.get_argument(eqs, dict_indvars, dict_depvars)
-    # [[:t]]
-
-    points = [vcat([train_sets[i][:, 2:end]'
-                    for i in eachindex(symbols_input) if symbols_input[i][2] == eq_arg]...)
-              for eq_arg in eq_args]
+    # equation wise indvar presence ~ [[:t]]
+    # in each equation atleast one depvars must be a function of all indvars(to cover heterogenous/not case)
+
+    # train_sets follows order of depvars
+    # take dataset indvar values if for equations depvar's indvar matches input symbol indvar
+    # points =  [vcat([train_sets[i][:, 2:end]'
+    #                 for i in eachindex(symbols_input) if symbols_input[i][2] == eq_arg]...)
+    #           for eq_arg in eq_args]
+
+    points = []
+    for eq_arg in eq_args
+        eq_points = []
+        for i in eachindex(symbols_input)
+            if symbols_input[i][2] == eq_arg
+                push!(eq_points, train_sets[i][:, 2:end]')
+                # Terminate to avoid repetitive ind var points inclusion
+                break
+            end
+        end
+        # Concatenate points for this equation argument
+        push!(points, vcat(eq_points...))
+    end
 
     return points
 end
@@ -40,17 +58,25 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
     adaptor = EltypeAdaptor{eltypeθ}()
         strategy::GridTraining,
         datafree_pde_loss_function,
-        datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc=nothing)
+        datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc = nothing)
     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
-
+    dx = strategy.dx
     eltypeθ = eltype(pinnrep.flat_init_params)
 
+    # physics loss merge_strategy_with_loglikelihood_function call case
+    if ((train_sets_bc isa Nothing)&&(train_sets_pde isa Nothing))
+        train_sets_pde, train_sets_bc = generate_training_sets(
+            domains, dx, eqs, bcs, eltypeθ,
+            dict_indvars, dict_depvars)
+    end
+
     # is vec as later each _set in pde_train_sets are columns as points transformed to vector of points (pde_train_sets must be rowwise)
     pde_loss_functions = if !(train_sets_pde isa Nothing)
+        # dataset and domain pde losses case
         pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
             train_sets_pde)
 
-        [get_loss_function(_loss, _set, eltypeθ, strategy)
+        [get_points_loss_functions(_loss, _set, eltypeθ, strategy)
                               for (_loss, _set) in zip(datafree_pde_loss_function,
             pde_train_sets)]
     else
@@ -58,10 +84,11 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
     end
     
     bc_loss_functions = if !(train_sets_bc isa Nothing)
+        # dataset and domain bc losses case
         bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
             train_sets_bc)
 
-        [get_loss_function(_loss, _set, eltypeθ, strategy)
+        [get_points_loss_functions(_loss, _set, eltypeθ, strategy)
                          for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
     else
         nothing
@@ -70,6 +97,13 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
     return pde_loss_functions, bc_loss_functions
 end
 
+function get_points_loss_functions(loss_function, train_set, eltypeθ, strategy::GridTraining;
+        τ = nothing)
+    function loss(θ, std)
+        logpdf(MvNormal(loss_function(train_set, θ)[1, :], LinearAlgebra.Diagonal(abs2.(std .* ones(length(train_set))))), zeros(length(train_set)))
+    end
+end
+
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
         strategy::GridTraining, datafree_pde_loss_function, datafree_bc_loss_function)
     (; domains, eqs, bcs, dict_indvars, dict_depvars) = pinnrep
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index a285dcb443..13e95e3ca2 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -112,7 +112,7 @@ end
         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
             Lux.Dense(n, 1)),
         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
+            Lux.Dense(n, 1))
     ]
 
     #Generate Data
@@ -216,7 +216,7 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0],
     param = [LogNormal(6.0, 0.5)],
-    Dict_differentials = Dict_differentials, progress = true)
+    Dict_differentials = Dict_differentials)
 
 param = 2 * π
 ts = vec(sol1.timepoints[1])
@@ -234,7 +234,7 @@ println("Example 3: Lotka Volterra with New parameter estimation")
 Dt = Differential(t)
 eqs = [Dt(x(t)) ~ α * x(t) - β * x(t) * y(t), Dt(y(t)) ~ -γ * y(t) + δ * x(t) * y(t)]
 bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
-domains = [t ∈ Interval(0.0, 4.0)]
+domains = [t ∈ Interval(0.0, 6.0)]
 
 # Define the parameters' values
 # params = [α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]
@@ -244,7 +244,7 @@ chainl = [
     Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
         Lux.Dense(6, 1)),
     Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-        Lux.Dense(6, 1)),
+        Lux.Dense(6, 1))
 ]
 
 initl, st = Lux.setup(Random.default_rng(), chainl[1])
@@ -256,9 +256,9 @@ initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
     [t],
     [x(t), y(t)],
     [α, β, γ, δ],
-    defaults = Dict([α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]))
+    defaults = Dict([α => 5, β => 0, γ => 0.5, δ => 2]))
 
-using NeuralPDE, Lux, Plots, OrdinaryDiffEq, Distributions, Random
+using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
 
 function lotka_volterra(u, p, t)
     α, β, γ, δ = p
@@ -281,10 +281,14 @@ solution = solve(prob, Tsit5(); saveat = dt)
 # Extract solution
 time = solution.t
 u = hcat(solution.u...)
+u1 = u .+ ((0.3 .* randn(size(u))) .* u)
+
+# using Plots, StatsPlots
+# plotly()
 # plot(time, u[1, :])
 # plot!(time, u[2, :])
 # Construct dataset
-dataset = [hcat(u[i, :], time) for i in 1:2]
+dataset = [hcat(u1[i, :], time) for i in 1:2]
 
 discretization = BayesianPINN(chainl, GridTraining(0.01), param_estim = true,
     dataset = [dataset, nothing])
@@ -295,6 +299,101 @@ Dict_differentials = Dict()
 exps = toexpr.(eqs)
 nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
 
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(0, 2),
+        Normal(0, 2),
+        Normal(0, 2),
+        Normal(0, 2)
+    ])
+
+# plot(sol2.timepoints[1]', sol2.ensemblesol[1])
+# plot!(sol2.timepoints[2]', sol2.ensemblesol[2])
+
+# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
+# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.1, 0.1],
+#     phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+#     phystdnew = [1, 1],
+#     priorsNNw = (0.0, 3.0),
+#     saveats = [1 / 50.0],
+#     # Kernel = AdvancedHMC.NUTS(0.8),
+#     param = [
+#         Normal(1, 2),
+#         Normal(2, 2),
+#         Normal(2, 2),
+#         Normal(0, 2)
+#     ], Dict_differentials = Dict_differentials, progress = true)
+
+# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
+# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
+
+# sol3 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# sol = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     # Kernel = AdvancedHMC.NUTS(0.8),
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ], progress = true)
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
+# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
+
 sol = ahmc_bayesian_pinn_pde(pde_system,
     discretization;
     draw_samples = 500,
@@ -307,8 +406,8 @@ sol = ahmc_bayesian_pinn_pde(pde_system,
         Normal(1.0, 2),
         Normal(1.0, 2),
         Normal(1.0, 2),
-        Normal(1.0, 2),
-    ], progress = true)
+        Normal(1.0, 2)
+    ])
 
 # plot!(sol.timepoints[1]', sol.ensemblesol[1])
 # plot!(sol.timepoints[2]', sol.ensemblesol[2])
@@ -318,7 +417,7 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     draw_samples = 500,
     bcstd = [0.05, 0.05],
     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    phystdnew = [0.1, 0.1],
+    phystdnew = [0.5, 0.5],
     #  Kernel = AdvancedHMC.NUTS(0.8),
     priorsNNw = (0.0, 10.0),
     saveats = [1 / 50.0],
@@ -326,12 +425,9 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
         Normal(1.0, 2),
         Normal(1.0, 2),
         Normal(1.0, 2),
-        Normal(1.0, 2),
+        Normal(1.0, 2)
     ],
-    Dict_differentials = Dict_differentials, progress = true)
-
-# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
-# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
+    Dict_differentials = Dict_differentials)
 
 param = 2 * π
 ts = vec(sol1.timepoints[1])
@@ -358,137 +454,139 @@ u_predict = pmean(sol1.ensemblesol[1])
 # end
 # println(points1 == points)
 
-using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-import ModelingToolkit: Interval, infimum, supremum, Distributions
-using Plots, MonteCarloMeasurements
-
-@parameters x, t, α
-@variables u(..)
-Dt = Differential(t)
-Dx = Differential(x)
-Dx2 = Differential(x)^2
-Dx3 = Differential(x)^3
-Dx4 = Differential(x)^4
-
-# α = 1
-β = 4
-γ = 1
-eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-bcs = [u(x, 0) ~ u_analytic(x, 0),
-    u(-10, t) ~ u_analytic(-10, t),
-    u(10, t) ~ u_analytic(10, t),
-    Dx(u(-10, t)) ~ du(-10, t),
-    Dx(u(10, t)) ~ du(10, t)]
-
-# Space and time domains
-domains = [x ∈ Interval(-10.0, 10.0),
-    t ∈ Interval(0.0, 1.0)]
-
-# Discretization
-dx = 0.4;
-dt = 0.2;
-
-# Function to compute analytical solution at a specific point (x, t)
-function u_analytic_point(x, t)
-    z = -x / 2 + t
-    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-end
-
-# Function to generate the dataset matrix
-function generate_dataset_matrix(domains, dx, dt)
-    x_values = -10:dx:10
-    t_values = 0.0:dt:1.0
-
-    dataset = []
-
-    for t in t_values
-        for x in x_values
-            u_value = u_analytic_point(x, t)
-            push!(dataset, [u_value, x, t])
-        end
-    end
-
-    return vcat([data' for data in dataset]...)
-end
-
-datasetpde = [generate_dataset_matrix(domains, dx, dt)]
-
-# noise to dataset
-noisydataset = deepcopy(datasetpde)
-noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
-                        randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
-                        noisydataset[1][:, 1]
-
-# plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-# plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-# Neural network
-chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-    Lux.Dense(8, 8, Lux.tanh),
-    Lux.Dense(8, 1))
+# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+# import ModelingToolkit: Interval, infimum, supremum, Distributions
+# using Plots, MonteCarloMeasurements
+
+# @parameters x, t, α
+# @variables u(..)
+# Dt = Differential(t)
+# Dx = Differential(x)
+# Dx2 = Differential(x)^2
+# Dx3 = Differential(x)^3
+# Dx4 = Differential(x)^4
+
+# # α = 1
+# β = 4
+# γ = 1
+# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+# bcs = [u(x, 0) ~ u_analytic(x, 0),
+#     u(-10, t) ~ u_analytic(-10, t),
+#     u(10, t) ~ u_analytic(10, t),
+#     Dx(u(-10, t)) ~ du(-10, t),
+#     Dx(u(10, t)) ~ du(10, t)]
+
+# # Space and time domains
+# domains = [x ∈ Interval(-10.0, 10.0),
+#     t ∈ Interval(0.0, 1.0)]
+
+# # Discretization
+# dx = 0.4;
+# dt = 0.2;
+
+# # Function to compute analytical solution at a specific point (x, t)
+# function u_analytic_point(x, t)
+#     z = -x / 2 + t
+#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# end
 
-discretization = NeuralPDE.BayesianPINN([chain],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+# # Function to generate the dataset matrix
+# function generate_dataset_matrix(domains, dx, dt)
+#     x_values = -10:dx:10
+#     t_values = 0.0:dt:1.0
 
-@named pde_system = PDESystem(eq,
-    bcs,
-    domains,
-    [x, t],
-    [u(x, t)],
-    [α],
-    defaults = Dict([α => 0.5]))
+#     dataset = []
 
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100,
-    bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-    phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 100.0, 1 / 100.0], progress = true)
+#     for t in t_values
+#         for x in x_values
+#             u_value = u_analytic_point(x, t)
+#             push!(dataset, [u_value, x, t])
+#         end
+#     end
 
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+#     return vcat([data' for data in dataset]...)
+# end
 
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100,
-    bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-    phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
-    param = [Distributions.LogNormal(0.5, 2)],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
-    progress = true)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, [dx / 10, dt])]
-u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-# p1 = plot(xs, u_predict, title = "predict")
-# p2 = plot(xs, u_real, title = "analytic")
-# p3 = plot(xs, diff_u, title = "error")
-# plot(p1, p2, p3)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, [dx / 10, dt])]
-u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-# p1 = plot(xs, u_predict, title = "predict")
-# p2 = plot(xs, u_real, title = "analytic")
-# p3 = plot(xs, diff_u, title = "error")
-# plot(p1, p2, p3)
+# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
+
+# # noise to dataset
+# noisydataset = deepcopy(datasetpde)
+# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
+#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
+#                         noisydataset[1][:, 1]
+
+# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+# # Neural network
+# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+#     Lux.Dense(8, 8, Lux.tanh),
+#     Lux.Dense(8, 1))
+
+# discretization = NeuralPDE.BayesianPINN([chain],
+#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+# @named pde_system = PDESystem(eq,
+#     bcs,
+#     domains,
+#     [x, t],
+#     [u(x, t)],
+#     [α],
+#     defaults = Dict([α => 0.5]))
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
+
+# eqs = pde_system.eqs
+# Dict_differentials = Dict()
+# exps = toexpr.(eqs)
+# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+# sol2 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
+#     param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
+#     progress = true)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)

From 04ce111c9a3b2d8a2be4c8fe9ed91164bc66164b Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 29 Feb 2024 00:12:00 +0530
Subject: [PATCH 082/107] corrected original and new implementation, comments

---
 src/PDE_BPINN.jl              |  42 ++--
 src/discretize.jl             |   3 +-
 src/training_strategies.jl    |  11 +-
 test/BPINN_PDE_tests.jl       |  35 ++-
 test/BPINN_PDEinvsol_tests.jl | 447 +---------------------------------
 5 files changed, 56 insertions(+), 482 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index ddac125da6..a64eb759c7 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -270,37 +270,39 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     newloss = if Dict_differentials isa Nothing
         nothing
     else
-        yuh1 = get_lossy(pinnrep, dataset_pde, Dict_differentials)
-        # eqs = pinnrep.bcs
-        # yuh2 = get_lossy(pinnrep, dataset_pde, eqs)
-
-        # consider all dataset domain points and for each row new set of equation loss function
-        # this is a vector of tuple{vector,nothing}
-        pde_loss_functions = [merge_strategy_with_loglikelihood_function(
-            pinnrep::PINNRepresentation,
+        datafree_colloc_loss_functions = get_lossy(pinnrep, dataset_pde, Dict_differentials)
+        # equals number of indvar coords in dataset
+        # add case for if parameters present in bcs?
+
+        train_sets_pde = get_dataset_train_points(pde_system.eqs,
+                dataset_pde,
+                pinnrep)
+        colloc_train_sets = [[hcat(train_sets_pde[i][:, j]...)' for i in eachindex(datafree_colloc_loss_functions[1])] for j in eachindex(datafree_colloc_loss_functions)]
+
+        # for each datafree_colloc_loss_function create loss_functions by passing dataset's indvar coords as train_sets_pde.
+        # placeholder strategy = GridTraining(0.1), datafree_bc_loss_function and train_sets_bc must be nothing
+        # order of indvar coords will be same as corresponding depvar coords values in dataset provided in get_lossy() call.
+        pde_loss_function_points = [merge_strategy_with_loglikelihood_function(
+            pinnrep,
             GridTraining(0.1),
-            yuh1[i],
+            datafree_colloc_loss_functions[i],
             nothing;
-            # pass transformation of each dataset row-corresponds to each point, for each depvar dataset point merged equation vector
-            train_sets_pde = get_dataset_train_points(pde_system.eqs,
-                [Array(data[i, :]') for data in dataset_pde],
-                pinnrep),
+            train_sets_pde = colloc_train_sets[i],
             train_sets_bc = nothing)
-                              for i in eachindex(yuh1)]
+                              for i in eachindex(datafree_colloc_loss_functions)]
 
         function L2_loss2(θ, allstd)
             stdpdesnew = allstd[4]
 
             # first vector of losses,from tuple -> pde losses, first[1] pde loss
-            pde_loglikelihoods = [[logpdf(Normal(0, stdpdesnew[j]), pde_loss_function(θ))
-                                   for (j, pde_loss_function) in enumerate(pde_loss_functions[i][1])]
-                                  for i in eachindex(pde_loss_functions)]
+            pde_loglikelihoods = [sum([pde_loss_function(θ, stdpdesnew[i])
+                                       for (i, pde_loss_function) in enumerate(pde_loss_functions[1])])
+                                  for pde_loss_functions in pde_loss_function_points]
 
-            # bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+            # bc_loglikelihoods = [sum([bc_loss_function(θ, stdpdesnew[i]) for (i, bc_loss_function) in enumerate(pde_loss_function_points[1])]) for pde_loss_function_points in pde_loss_functions]
             #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
 
-            return sum(sum(pde_loglikelihoods))
-            # sum(sum(pde_loglikelihoods) + sum(bc_loglikelihoods))
+            return sum(pde_loglikelihoods)
         end
     end
 
diff --git a/src/discretize.jl b/src/discretize.jl
index d90e04fb20..757c1f8b8f 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -525,7 +525,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
     end
 
     function get_likelihood_estimate_function(discretization::BayesianPINN)
-        # Because seperate reweighting code section needed and loglikelihood is pointwise independant
+        # Because separate reweighting code section needed and loglikelihood is pointwise independent
         pde_loss_functions, bc_loss_functions = merge_strategy_with_loglikelihood_function(
             pinnrep,
             strategy,
@@ -578,7 +578,6 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
                 pde_loglikelihoods += [logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
                                        for (j, pde_loss_function) in enumerate(datapde_loss_functions)]
             end
-
             if !(databc_loss_functions isa Nothing)
                 bc_loglikelihoods += [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
                                       for (j, bc_loss_function) in enumerate(databc_loss_functions)]
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index a9240488df..f7493af8a6 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -29,10 +29,6 @@ function get_dataset_train_points(eqs, train_sets, pinnrep)
 
     # train_sets follows order of depvars
     # take dataset indvar values if for equations depvar's indvar matches input symbol indvar
-    # points =  [vcat([train_sets[i][:, 2:end]'
-    #                 for i in eachindex(symbols_input) if symbols_input[i][2] == eq_arg]...)
-    #           for eq_arg in eq_args]
-
     points = []
     for eq_arg in eq_args
         eq_points = []
@@ -99,8 +95,13 @@ end
 
 function get_points_loss_functions(loss_function, train_set, eltypeθ, strategy::GridTraining;
         τ = nothing)
+        # loss_function length is number of all points loss is being evaluated upon
+        # train sets rows are for each indvar, cols are coordinates (row_1,row_2,..row_n) at which loss evaluated
     function loss(θ, std)
-        logpdf(MvNormal(loss_function(train_set, θ)[1, :], LinearAlgebra.Diagonal(abs2.(std .* ones(length(train_set))))), zeros(length(train_set)))
+        logpdf(
+            MvNormal(loss_function(train_set, θ)[1, :],
+                LinearAlgebra.Diagonal(abs2.(std .* ones(size(train_set)[2])))),
+            zeros(size(train_set)[2]))
     end
 end
 
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 1388722ce4..2b6ec13923 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -6,6 +6,8 @@
 
     Random.seed!(100)
 
+@testset "Example 1: 1D Periodic System" begin
+    # Cos(pi*t) example
     @parameters t
     @variables u(..)
     Dt = Differential(t)
@@ -29,8 +31,8 @@
     u_real = [analytic_sol_func(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
 
-    @test u_predict≈u_real atol=0.5
-    @test mean(u_predict .- u_real) < 0.1
+    @test u_predict≈u_real atol=0.05
+    @test mean(u_predict .- u_real) < 0.001
 end
 
 @testitem "BPINN PDE II: 1D ODE" tags=[:pdebpinn] begin
@@ -65,15 +67,19 @@ end
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
 
-    sol1 = ahmc_bayesian_pinn_pde(
-        pde_system, discretization; draw_samples = 500, bcstd = [0.1],
-        phystd = [0.05], priorsNNw = (0.0, 10.0), saveats = [1 / 100.0])
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 500,
+        bcstd = [0.1],
+        phystd = [0.05],
+        priorsNNw = (0.0, 10.0),
+        saveats = [1 / 100.0], progress=true)
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol1.timepoints[1]
     u_real = vec([analytic_sol_func(t) for t in ts])
     u_predict = pmean(sol1.ensemblesol[1])
-    @test u_predict≈u_real atol=0.8
+    @test u_predict≈u_real atol=0.5
 end
 
 @testitem "BPINN PDE III: 3rd Degree ODE" tags=[:pdebpinn] begin
@@ -170,16 +176,20 @@ end
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 
-    sol = ahmc_bayesian_pinn_pde(pde_system, discretization; draw_samples = 200,
-        bcstd = [0.003, 0.003, 0.003, 0.003], phystd = [0.003],
-        priorsNNw = (0.0, 10.0), saveats = [1 / 100.0, 1 / 100.0])
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 200,
+        bcstd = [0.01, 0.01, 0.01, 0.01],
+        phystd = [0.005],
+        priorsNNw = (0.0, 2.0),
+        saveats = [1 / 100.0, 1 / 100.0])
 
     xs = sol.timepoints[1]
     analytic_sol_func(x, y) = (sinpi(x) * sinpi(y)) / (2pi^2)
 
     u_predict = pmean(sol.ensemblesol[1])
     u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-    @test u_predict≈u_real rtol=0.5
+    @test u_predict≈u_real atol=0.8
 end
 
 @testitem "BPINN PDE: Translating from Flux" tags=[:pdebpinn] begin
@@ -219,7 +229,6 @@ end
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol.timepoints[1]
     u_real = vec([analytic_sol_func(t) for t in ts])
-    u_predict = pmean(sol.ensemblesol[1])
-
-    @test u_predict≈u_real atol=0.8
+    u_predict = pmean(sol1.ensemblesol[1])
+    @test u_predict≈u_real atol=0.1
 end
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 13e95e3ca2..8b1240f6ad 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -7,7 +7,7 @@ using ComponentArrays, ModelingToolkit
 
 Random.seed!(100)
 
-@testset "Example 1: 2D Periodic System with parameter estimation" begin
+@testset "Example 1: 1D Periodic System with parameter estimation" begin
     # Cos(pi*t) periodic curve
     @parameters t, p
     @variables u(..)
@@ -88,9 +88,9 @@ Random.seed!(100)
     u_real = [analytic_sol_func1(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
 
-    @test u_predict≈u_real atol=1.5
-    @test mean(u_predict .- u_real) < 0.1
-    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
+    @test u_predict≈u_real atol=0.1
+    @test mean(u_predict .- u_real) < 0.01
+    @test sol1.estimated_de_params[1]≈param atol=0.1
 end
 
 @testset "Example 2: Lorenz System with parameter estimation" begin
@@ -152,441 +152,4 @@ end
     p_ = sol1.estimated_de_params[1]
     @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-end
-
-function recur_expression(exp, Dict_differentials)
-    for in_exp in exp.args
-        if !(in_exp isa Expr)
-            # skip +,== symbols, characters etc
-            continue
-
-        elseif in_exp.args[1] isa ModelingToolkit.Differential
-            # first symbol of differential term
-            # Dict_differentials for masking differential terms
-            # and resubstituting differentials in equations after putting in interpolations
-            # temp = in_exp.args[end]
-            Dict_differentials[eval(in_exp)] = Symbolics.variable("diff_$(length(Dict_differentials) + 1)")
-            return
-        else
-            recur_expression(in_exp, Dict_differentials)
-        end
-    end
-end
-
-println("Example 3: 2D Periodic System with New parameter estimation")
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-    dataset = [dataset, nothing])
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)],
-    Dict_differentials = Dict_differentials)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-println("Example 3: Lotka Volterra with New parameter estimation")
-@parameters t α β γ δ
-@variables x(..) y(..)
-
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ α * x(t) - β * x(t) * y(t), Dt(y(t)) ~ -γ * y(t) + δ * x(t) * y(t)]
-bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
-domains = [t ∈ Interval(0.0, 6.0)]
-
-# Define the parameters' values
-# params = [α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]
-# p = [1.5, 1.0, 3.0, 1.0]
-
-chainl = [
-    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-        Lux.Dense(6, 1)),
-    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-        Lux.Dense(6, 1))
-]
-
-initl, st = Lux.setup(Random.default_rng(), chainl[1])
-initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [x(t), y(t)],
-    [α, β, γ, δ],
-    defaults = Dict([α => 5, β => 0, γ => 0.5, δ => 2]))
-
-using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
-
-function lotka_volterra(u, p, t)
-    α, β, γ, δ = p
-    x, y = u
-    dx = (α - β * y) * x
-    dy = (δ * x - γ) * y
-    return [dx, dy]
-end
-
-# initial-value problem.
-u0 = [1.0, 1.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 4.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-
-# Solve using OrdinaryDiffEq.jl solver
-dt = 0.05
-solution = solve(prob, Tsit5(); saveat = dt)
-
-# Extract solution
-time = solution.t
-u = hcat(solution.u...)
-u1 = u .+ ((0.3 .* randn(size(u))) .* u)
-
-# using Plots, StatsPlots
-# plotly()
-# plot(time, u[1, :])
-# plot!(time, u[2, :])
-# Construct dataset
-dataset = [hcat(u1[i, :], time) for i in 1:2]
-
-discretization = BayesianPINN(chainl, GridTraining(0.01), param_estim = true,
-    dataset = [dataset, nothing])
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(0, 2),
-        Normal(0, 2),
-        Normal(0, 2),
-        Normal(0, 2)
-    ])
-
-# plot(sol2.timepoints[1]', sol2.ensemblesol[1])
-# plot!(sol2.timepoints[2]', sol2.ensemblesol[2])
-
-# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
-# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.1, 0.1],
-#     phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-#     phystdnew = [1, 1],
-#     priorsNNw = (0.0, 3.0),
-#     saveats = [1 / 50.0],
-#     # Kernel = AdvancedHMC.NUTS(0.8),
-#     param = [
-#         Normal(1, 2),
-#         Normal(2, 2),
-#         Normal(2, 2),
-#         Normal(0, 2)
-#     ], Dict_differentials = Dict_differentials, progress = true)
-
-# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
-# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
-
-# sol3 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# sol = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     # Kernel = AdvancedHMC.NUTS(0.8),
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ], progress = true)
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
-# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
-
-sol = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ])
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    phystdnew = [0.5, 0.5],
-    #  Kernel = AdvancedHMC.NUTS(0.8),
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ],
-    Dict_differentials = Dict_differentials)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-# points1 = []
-# for eq_arg in eq_args
-#     a = []
-#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
-#     for i in eachindex(symbols_input)
-#         if symbols_input[i][2] == eq_arg
-#             # include domain points of that depvar
-#             # each loss equation take domain matrix [points..;points..]
-#             push!(a, train_sets[i][:, 2:end]')
-#         end
-#     end
-#     # vcat as new row for next equation
-#     push!(points1, vcat(a...))
-# end
-# println(points1 == points)
-
-# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-# import ModelingToolkit: Interval, infimum, supremum, Distributions
-# using Plots, MonteCarloMeasurements
-
-# @parameters x, t, α
-# @variables u(..)
-# Dt = Differential(t)
-# Dx = Differential(x)
-# Dx2 = Differential(x)^2
-# Dx3 = Differential(x)^3
-# Dx4 = Differential(x)^4
-
-# # α = 1
-# β = 4
-# γ = 1
-# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-# bcs = [u(x, 0) ~ u_analytic(x, 0),
-#     u(-10, t) ~ u_analytic(-10, t),
-#     u(10, t) ~ u_analytic(10, t),
-#     Dx(u(-10, t)) ~ du(-10, t),
-#     Dx(u(10, t)) ~ du(10, t)]
-
-# # Space and time domains
-# domains = [x ∈ Interval(-10.0, 10.0),
-#     t ∈ Interval(0.0, 1.0)]
-
-# # Discretization
-# dx = 0.4;
-# dt = 0.2;
-
-# # Function to compute analytical solution at a specific point (x, t)
-# function u_analytic_point(x, t)
-#     z = -x / 2 + t
-#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# end
-
-# # Function to generate the dataset matrix
-# function generate_dataset_matrix(domains, dx, dt)
-#     x_values = -10:dx:10
-#     t_values = 0.0:dt:1.0
-
-#     dataset = []
-
-#     for t in t_values
-#         for x in x_values
-#             u_value = u_analytic_point(x, t)
-#             push!(dataset, [u_value, x, t])
-#         end
-#     end
-
-#     return vcat([data' for data in dataset]...)
-# end
-
-# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
-
-# # noise to dataset
-# noisydataset = deepcopy(datasetpde)
-# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
-#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
-#                         noisydataset[1][:, 1]
-
-# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-# # Neural network
-# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-#     Lux.Dense(8, 8, Lux.tanh),
-#     Lux.Dense(8, 1))
-
-# discretization = NeuralPDE.BayesianPINN([chain],
-#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-# @named pde_system = PDESystem(eq,
-#     bcs,
-#     domains,
-#     [x, t],
-#     [u(x, t)],
-#     [α],
-#     defaults = Dict([α => 0.5]))
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
-
-# eqs = pde_system.eqs
-# Dict_differentials = Dict()
-# exps = toexpr.(eqs)
-# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-# sol2 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
-#     param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
-#     progress = true)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
+end
\ No newline at end of file

From e2ebfd0450bd5f00ff0c905a5c59ba9682136ecd Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 29 Feb 2024 00:44:59 +0530
Subject: [PATCH 083/107] update BPINN_ode, BPINN_PDE_tests

---
 test/BPINN_PDE_tests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 2b6ec13923..9224f9908a 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -73,7 +73,7 @@ end
         bcstd = [0.1],
         phystd = [0.05],
         priorsNNw = (0.0, 10.0),
-        saveats = [1 / 100.0], progress=true)
+        saveats = [1 / 100.0])
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol1.timepoints[1]

From d48e67cb53e91743ba51190acfea0d97c77f6ae5 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 29 Feb 2024 02:33:41 +0530
Subject: [PATCH 084/107] update BPINN_PDE_tests.jl

---
 test/BPINN_PDE_tests.jl | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 9224f9908a..7217647a2f 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -31,7 +31,7 @@
     u_real = [analytic_sol_func(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
 
-    @test u_predict≈u_real atol=0.05
+    @test u_predict≈u_real atol=0.08
     @test mean(u_predict .- u_real) < 0.001
 end
 
@@ -223,12 +223,17 @@ end
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
 
-    sol = ahmc_bayesian_pinn_pde(pde_system, discretization; draw_samples = 500,
-        bcstd = [0.1], phystd = [0.05], priorsNNw = (0.0, 10.0), saveats = [1 / 100.0])
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 500,
+        bcstd = [0.1],
+        phystd = [0.05],
+        priorsNNw = (0.0, 10.0),
+        saveats = [1 / 100.0],progress=true)
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol.timepoints[1]
     u_real = vec([analytic_sol_func(t) for t in ts])
     u_predict = pmean(sol1.ensemblesol[1])
-    @test u_predict≈u_real atol=0.1
+    @test u_predict≈u_real atol=0.5
 end

From e57aa5afe08b6bd21b667a4c2be528020fc263a9 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 29 Feb 2024 10:33:18 +0530
Subject: [PATCH 085/107] update BPINN_PDE_tests.jl

---
 test/BPINN_PDE_tests.jl | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 7217647a2f..e0a8ea3a3f 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -22,17 +22,21 @@
     # non adaptive case
     discretization = BayesianPINN([chainl], GridTraining([0.01]))
 
-    sol1 = ahmc_bayesian_pinn_pde(
-        pde_system, discretization; draw_samples = 1500, bcstd = [0.02],
-        phystd = [0.01], priorsNNw = (0.0, 1.0), saveats = [1 / 50.0])
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 2000,
+        bcstd = [0.02],
+        phystd = [0.01],
+        priorsNNw = (0.0, 10.0),
+        saveats = [1 / 50.0])
 
     analytic_sol_func(u0, t) = u0 + sinpi(2t) / (2pi)
     ts = vec(sol1.timepoints[1])
     u_real = [analytic_sol_func(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
 
-    @test u_predict≈u_real atol=0.08
-    @test mean(u_predict .- u_real) < 0.001
+    @test u_predict≈u_real atol=0.05
+    @test mean(u_predict .- u_real) < 1e-5
 end
 
 @testitem "BPINN PDE II: 1D ODE" tags=[:pdebpinn] begin
@@ -229,7 +233,7 @@ end
         bcstd = [0.1],
         phystd = [0.05],
         priorsNNw = (0.0, 10.0),
-        saveats = [1 / 100.0],progress=true)
+        saveats = [1 / 100.0])
 
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = sol.timepoints[1]

From 35ddbed0176b9349c1ab35b964c9a6b7285fa3d1 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 19:54:59 +0530
Subject: [PATCH 086/107] done for now

---
 src/PDE_BPINN.jl              |  4 +-
 test/BPINN_PDE_tests.jl       |  2 +-
 test/BPINN_PDEinvsol_tests.jl | 94 ++++++++++++++++++++++++++++++++++-
 test/bpinnexperimental.jl     | 82 +++++++++++++++++++-----------
 4 files changed, 148 insertions(+), 34 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index a64eb759c7..d0e36dd04f 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -288,7 +288,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
             datafree_colloc_loss_functions[i],
             nothing;
             train_sets_pde = colloc_train_sets[i],
-            train_sets_bc = nothing)
+            train_sets_bc = nothing)[1]
                               for i in eachindex(datafree_colloc_loss_functions)]
 
         function L2_loss2(θ, allstd)
@@ -296,7 +296,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
 
             # first vector of losses,from tuple -> pde losses, first[1] pde loss
             pde_loglikelihoods = [sum([pde_loss_function(θ, stdpdesnew[i])
-                                       for (i, pde_loss_function) in enumerate(pde_loss_functions[1])])
+                                       for (i, pde_loss_function) in enumerate(pde_loss_functions)])
                                   for pde_loss_functions in pde_loss_function_points]
 
             # bc_loglikelihoods = [sum([bc_loss_function(θ, stdpdesnew[i]) for (i, bc_loss_function) in enumerate(pde_loss_function_points[1])]) for pde_loss_function_points in pde_loss_functions]
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index e0a8ea3a3f..e543baea27 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -36,7 +36,7 @@
     u_predict = pmean(sol1.ensemblesol[1])
 
     @test u_predict≈u_real atol=0.05
-    @test mean(u_predict .- u_real) < 1e-5
+    @test mean(u_predict .- u_real) < 1e-3
 end
 
 @testitem "BPINN PDE II: 1D ODE" tags=[:pdebpinn] begin
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 8b1240f6ad..631d54e015 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -152,4 +152,96 @@ end
     p_ = sol1.estimated_de_params[1]
     @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
     # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-end
\ No newline at end of file
+end
+
+function recur_expression(exp, Dict_differentials)
+    for in_exp in exp.args
+        if !(in_exp isa Expr)
+            # skip +,== symbols, characters etc
+            continue
+
+        elseif in_exp.args[1] isa ModelingToolkit.Differential
+            # first symbol of differential term
+            # Dict_differentials for masking differential terms
+            # and resubstituting differentials in equations after putting in interpolations
+            # temp = in_exp.args[end]
+            Dict_differentials[eval(in_exp)] = Symbolics.variable("diff_$(length(Dict_differentials) + 1)")
+            return
+        else
+            recur_expression(in_exp, Dict_differentials)
+        end
+    end
+end
+
+println("Example 3: 2D Periodic System with New parameter estimation")
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) * u(t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+    dataset = [dataset, nothing])
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01], phystdnew = [0.05],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(6.0, 0.5)],
+    progress = true)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+ts = vec(sol2.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol2.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file
diff --git a/test/bpinnexperimental.jl b/test/bpinnexperimental.jl
index 3de049bf58..a8a389ad44 100644
--- a/test/bpinnexperimental.jl
+++ b/test/bpinnexperimental.jl
@@ -44,20 +44,32 @@ plot!(solution, labels = ["x" "y"])
 chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
     Lux.Dense(6, 2))
 
-alg = BNNODE(chain;
-dataset = dataset,
-draw_samples = 1000,
-l2std = [0.1, 0.1],
-phystd = [0.1, 0.1],
-priorsNNw = (0.0, 3.0),
-param = [
-    Normal(1, 2),
-    Normal(2, 2),
-    Normal(2, 2),
-    Normal(0, 2)], progress = true)
-
-@time sol_pestim1 = solve(prob, alg; saveat = dt,)
-@time sol_pestim2 = solve(prob, alg; estim_collocate = true, saveat = dt)
+alg1 = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.1, 0.1],
+    priorsNNw = (0.0, 3.0),
+    param = [
+        Normal(1, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(0, 2)], progress = true)
+
+alg2 = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.1, 0.1],
+    priorsNNw = (0.0, 3.0),
+    param = [
+        Normal(1, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(0, 2)], estim_collocate = true, progress = true)
+
+@time sol_pestim1 = solve(prob, alg1; saveat = dt)
+@time sol_pestim2 = solve(prob, alg2; saveat = dt)
 plot(times, sol_pestim1.ensemblesol[1], label = "estimated x1")
 plot!(times, sol_pestim2.ensemblesol[1], label = "estimated x2")
 plot!(times, sol_pestim1.ensemblesol[2], label = "estimated y1")
@@ -66,28 +78,29 @@ plot!(times, sol_pestim2.ensemblesol[2], label = "estimated y2")
 # comparing it with the original solution
 plot!(solution, labels = ["true x" "true y"])
 
-@show sol_pestim1.estimated_ode_params
-@show sol_pestim2.estimated_ode_params
+@show sol_pestim1.estimated_de_params
+@show sol_pestim2.estimated_de_params
 
-function fitz(u, p , t)
+function fitz(u, p, t)
     v, w = u[1], u[2]
-    a,b,τinv,l = p[1], p[2], p[3], p[4]
-    
-    dv = v - 0.33*v^3 -w + l
-    dw = τinv*(v +  a - b*w)
+    a, b, τinv, l = p[1], p[2], p[3], p[4]
+
+    dv = v - 0.33 * v^3 - w + l
+    dw = τinv * (v + a - b * w)
 
     return [dv, dw]
 end
 
-prob_ode_fitzhughnagumo = ODEProblem(fitz, [1.0,1.0], (0.0,10.0), [0.7,0.8,1/12.5,0.5])
+prob_ode_fitzhughnagumo = ODEProblem(
+    fitz, [1.0, 1.0], (0.0, 10.0), [0.7, 0.8, 1 / 12.5, 0.5])
 dt = 0.5
 sol = solve(prob_ode_fitzhughnagumo, Tsit5(), saveat = dt)
 
 sig = 0.20
 data = Array(sol)
-dataset = [data[1,:] .+ (sig .* rand(length(sol.t))), data[2, :] .+ (sig .* rand(length(sol.t))), sol.t]
-priors = [Normal(0.5,1.0), Normal(0.5,1.0), Normal(0.0,0.5), Normal(0.5,1.0)]
-
+dataset = [data[1, :] .+ (sig .* rand(length(sol.t))),
+    data[2, :] .+ (sig .* rand(length(sol.t))), sol.t]
+priors = [Normal(0.5, 1.0), Normal(0.5, 1.0), Normal(0.0, 0.5), Normal(0.5, 1.0)]
 
 plot(sol.t, dataset[1], label = "noisy x")
 plot!(sol.t, dataset[2], label = "noisy y")
@@ -98,7 +111,7 @@ chain = Lux.Chain(Lux.Dense(1, 10, tanh), Lux.Dense(10, 10, tanh),
 
 Adaptorkwargs = (Adaptor = AdvancedHMC.StanHMCAdaptor,
     Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.8)
-alg = BNNODE(chain;
+alg1 = BNNODE(chain;
 dataset = dataset,
 draw_samples = 1000,
 l2std = [0.1, 0.1],
@@ -107,12 +120,21 @@ priorsNNw = (0.01, 3.0),
 Adaptorkwargs = Adaptorkwargs,
 param = priors, progress = true)
 
-@time sol_pestim3 = solve(prob_ode_fitzhughnagumo, alg; saveat = dt)
-@time sol_pestim4 = solve(prob_ode_fitzhughnagumo, alg; estim_collocate = true, saveat = dt)
+alg2 = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.1, 0.1],
+    priorsNNw = (0.01, 3.0),
+    Adaptorkwargs = Adaptorkwargs,
+    param = priors, estim_collocate = true, progress = true)
+
+@time sol_pestim3 = solve(prob_ode_fitzhughnagumo, alg1; saveat = dt)
+@time sol_pestim4 = solve(prob_ode_fitzhughnagumo, alg2; saveat = dt)
 plot!(sol.t, sol_pestim3.ensemblesol[1], label = "estimated x1")
 plot!(sol.t, sol_pestim4.ensemblesol[1], label = "estimated x2")
 plot!(sol.t, sol_pestim3.ensemblesol[2], label = "estimated y1")
 plot!(sol.t, sol_pestim4.ensemblesol[2], label = "estimated y2")
 
-@show sol_pestim3.estimated_ode_params
-@show sol_pestim4.estimated_ode_params
+@show sol_pestim3.estimated_de_params
+@show sol_pestim4.estimated_de_params

From 8d4e1d5bda094d5f53af9b5f77a2cd9aad62d57a Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 20:50:40 +0530
Subject: [PATCH 087/107] merge conflict resolution

---
 src/collocated_estim.jl | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/src/collocated_estim.jl b/src/collocated_estim.jl
index a2f81b3ed9..3902f74a27 100644
--- a/src/collocated_estim.jl
+++ b/src/collocated_estim.jl
@@ -162,31 +162,4 @@ function calderivatives(prob, dataset)
     gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
 
     return gradients
-end
-
-function calculate_derivatives(dataset)
-
-    # u = dataset[1]
-    # u1 = dataset[2]
-    # t = dataset[end]
-    # # control points
-    # n = Int(floor(length(t) / 10))
-    # # spline for datasetvalues(solution) 
-    # # interp = BSplineApprox(u, t, 4, 10, :Uniform, :Uniform)
-    # interp = CubicSpline(u, t)
-    # interp1 = CubicSpline(u1, t)
-    # # derivatives interpolation
-    # dx = t[2] - t[1]
-    # time = collect(t[1]:dx:t[end])
-    # smoothu = [interp(i) for i in time]
-    # smoothu1 = [interp1(i) for i in time]
-    # # derivative of the spline (must match function derivative) 
-    # û = tvdiff(smoothu, 20, 0.5, dx = dx, ε = 1)
-    # û1 = tvdiff(smoothu1, 20, 0.5, dx = dx, ε = 1)
-    # # tvdiff(smoothu, 100, 0.035, dx = dx, ε = 1)
-    # # FDM
-    # # û1 = diff(u) / dx
-    # # dataset[1] and smoothu are almost equal(rounding errors)
-    # return [û, û1] 
-
 end
\ No newline at end of file

From 0f3c8f03137a1a57bb37e1f4e38b3872b3c1bd92 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 20:55:24 +0530
Subject: [PATCH 088/107] update NeuralPDE.jl

---
 src/NeuralPDE.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index c0798c6270..f927af98aa 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -89,6 +89,7 @@ include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 
 include("dgm.jl")
+include("collocated_estim.jl")
 
 export NNODE, NNDAE
 export BNNODE, ahmc_bayesian_pinn_ode, ahmc_bayesian_pinn_pde

From 9d084c1756ac2f75a49c75b3b14b2f38d94a96b5 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 21:00:38 +0530
Subject: [PATCH 089/107] update NeuralPDE.jl

---
 src/NeuralPDE.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index f927af98aa..c0798c6270 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -89,7 +89,6 @@ include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 
 include("dgm.jl")
-include("collocated_estim.jl")
 
 export NNODE, NNDAE
 export BNNODE, ahmc_bayesian_pinn_ode, ahmc_bayesian_pinn_pde

From 9b0ed60769d75532a242759d87e296fd92d2c12c Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 26 Mar 2024 21:01:57 +0530
Subject: [PATCH 090/107] update NeuralPDE.jl

---
 src/NeuralPDE.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index c0798c6270..f927af98aa 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -89,6 +89,7 @@ include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 
 include("dgm.jl")
+include("collocated_estim.jl")
 
 export NNODE, NNDAE
 export BNNODE, ahmc_bayesian_pinn_ode, ahmc_bayesian_pinn_pde

From 12650346bf03d3d3a4686afcd03e0af2b09b03cf Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 29 Mar 2024 22:02:18 +0530
Subject: [PATCH 091/107] pmean for tests

---
 test/BPINN_PDEinvsol_tests.jl | 6 ++----
 test/BPINN_Tests.jl           | 8 ++++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 631d54e015..53450f90da 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -216,8 +216,7 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0],
     param = [LogNormal(6.0, 0.5)],
-    Dict_differentials = Dict_differentials,
-    progress = true)
+    Dict_differentials = Dict_differentials)
 
 sol2 = ahmc_bayesian_pinn_pde(pde_system,
     discretization;
@@ -226,8 +225,7 @@ sol2 = ahmc_bayesian_pinn_pde(pde_system,
     phystd = [0.01], l2std = [0.01],
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)],
-    progress = true)
+    param = [LogNormal(6.0, 0.5)])
 
 param = 2 * π
 ts = vec(sol1.timepoints[1])
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 2fe347b3b4..6a32c560f0 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -53,8 +53,8 @@ Random.seed!(100)
     @test mean(abs.(physsol1 .- meanscurve)) < 0.005
 
     #--------------------- solve() call 
-    @test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
-    @test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
+    @test mean(abs.(x̂1 .- pmean(sol1lux.ensemblesol[1]))) < 0.025
+    @test mean(abs.(physsol0_1 .- pmean(sol1lux.ensemblesol[1]))) < 0.025
 end
 
 @testset "Example 2 - with parameter estimation" begin
@@ -122,7 +122,7 @@ end
     @test abs(p - mean([fhsamples[i][23] for i in 2000:length(fhsamples)])) < abs(0.35 * p)
 
     #-------------------------- solve() call  
-    @test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
+    @test mean(abs.(physsol1_1 .- pmean(sol2lux.ensemblesol[1]))) < 8e-2
 
     # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
     @test abs(p - sol2lux.estimated_de_params[1]) < abs(0.15 * p)
@@ -211,7 +211,7 @@ end
 
     #-------------------------- solve() call 
     # (lux chain)
-    @test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
+    @test mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 0.15
     # estimated parameters(lux chain)
     param1 = sol3lux_pestim.estimated_de_params[1]
     @test abs(param1 - p) < abs(0.45 * p)

From e8749330fb3ba040dc06f5d0a9ca11db2946e328 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 29 Mar 2024 22:11:25 +0530
Subject: [PATCH 092/107] .

---
 test/BPINN_PDEinvsol_tests.jl | 1113 ++++++++++++++++++++++++++++++++-
 1 file changed, 1112 insertions(+), 1 deletion(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 53450f90da..797361e7ac 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -242,4 +242,1115 @@ u_predict = pmean(sol2.ensemblesol[1])
 
 @test u_predict≈u_real atol=1.5
 @test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+
+
+println("Example 3: Lotka Volterra with New parameter estimation")
+@parameters t α β γ δ
+@variables x(..) y(..)
+
+Dt = Differential(t)
+eqs = [Dt(x(t))*α  ~ x(t) - β * x(t) * y(t), Dt(y(t))*γ  ~ δ * x(t) * y(t) - y(t)]
+bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
+domains = [t ∈ Interval(0.0, 7.0)]
+
+# Define the parameters' values
+# params = [α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]
+# p = [1.5, 1.0, 3.0, 1.0]
+
+chainl = [
+    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),Lux.Dense(6, 1)),
+    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),Lux.Dense(6, 1))
+]
+
+initl, st = Lux.setup(Random.default_rng(), chainl[1])
+initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
+
+using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    # Model parameters. 
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+# initial-value problem.
+u0 = [1.0, 1.0]
+# p = [2/3, 2/3, 1/3.0, 1/3.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 7.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+
+
+# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
+#     smoothed_data = similar(data, T, length(data))
+
+#     for i in 1:length(data)
+#         start_idx = max(1, i - window_size)
+#         end_idx = min(length(data), i + window_size)
+#         smoothed_data[i] = mean(data[start_idx:end_idx])
+#     end
+
+#     return smoothed_data'
+# end
+
+# Extract solution
+time = solution.t
+u = hcat(solution.u...)
+time1=solution.t
+u_noisy = u .+ u .* (0.3 .* randn(size(u)))
+
+plot(time,u[1,:])
+plot!(time,u[2,:])
+scatter!(time1,u_noisy[1,:])
+scatter!(time1,u_noisy[2,:])
+
+# window_size = 5
+# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
+#                      for i in 1:length(solution.u[1])]
+# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
+# Randomly select some points from the solution
+num_points = 150  # Number of points to select
+selected_indices = rand(1:size(u_noisy, 2), num_points)
+upoints = [u_noisy[:, i] for i in selected_indices]
+timepoints = [time[i] for i in selected_indices]
+temp=hcat(upoints...)
+dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
+
+# plot(time,u[1,:])
+# plot!(time,u[2,:])
+
+discretization = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [x(t), y(t)],
+    [α, β, γ, δ],
+    defaults = Dict([α =>2, β => 3, γ =>3, δ =>2]))
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], progress = true)
+
+# time
+# dataset
+# chainl[1](time', sol3.estimated_nn_params[1], st)[1][1,:]
+# plot!(time1, chainl[1](time1', sol3.estimated_nn_params[1], st)[1][1,:])
+# plot!(time1, chainl[2](time1', sol3.estimated_nn_params[2], st)[1][1,:])
+# plot!(time1, chainl[1](time1', sol5.estimated_nn_params[1], st)[1][1,:])
+# plot!(time1, chainl[2](time1', sol5.estimated_nn_params[2], st)[1][1,:])
+# time1 = collect(0.0:(1 / 100.0):8.0)
+
+sol4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], progress = true
+)
+
+
+sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.15, 0.15],
+    phystd = [0.15, 0.15], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.05, 0.05],
+    phystd = [0.05, 0.05], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# 100 points(sol5_2 vs sol3)
+sol5_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# 100 points(sol5_2 vs sol3)
+sol5_2_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.08, 0.08],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# 100 points(sol5_2 vs sol3)
+sol5_2_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# 50 datapoint 0-5 sol5 vs sol4
+# julia> sol4.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.549 ± 0.0058
+#  0.71 ± 0.0042
+#  0.408 ± 0.0063
+#  0.355 ± 0.0015
+
+# julia> sol5.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0052
+#  0.702 ± 0.0034
+#  0.346 ± 0.0037
+#  0.335 ± 0.0013
+
+# 100 datapoint 0-5 sol5_2 vs sol3
+# julia> sol3.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.598 ± 0.0037
+#  0.711 ± 0.0027
+#  0.399 ± 0.0032
+#  0.333 ± 0.0011
+
+# julia> sol5_2.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0035
+#  0.686 ± 0.0026
+#  0.395 ± 0.0029
+#  0.328 ± 0.00095
+
+# timespan for full dataset (0-8)
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true)
+
+sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true
+)
+
+sol7 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], progress = true
+)
+
+sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+    
+sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+ 
+sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+lpfun = function f(chain::Chains) # function to compute the logpdf values
+    niter, nparams, nchains = size(chain)
+    lp = zeros(niter + nchains) # resulting logpdf values
+    for i = 1:nparams
+        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,1]')
+        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,2]')
+    end
+    return lp
+end
+
+DIC, pD = dic(sol3.original.mcmc_chain, lpfun)
+DIC1, pD1 = dic(sol4.original.mcmc_chain, lpfun)
+
+size(sol3.original.mcmc_chain)
+Array(sol3.original.mcmc_chain[1,:,:])
+length(sol3.estimated_nn_params[1])
+chainl[1](time', sol3.estimated_nn_params[1], st)[1]
+
+data = [hcat(calculate_derivatives2(dataset[i][:, 2], dataset[1][:, 1]),dataset[i][:, 2]) for i in eachindex(dataset)]
+dataset[1][:,1]
+dataset[2]
+plot!(dataset[1][:,2],dataset[1][:,1])
+eqs
+sol5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.02, 0.02],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(3, 2),
+        Normal(3, 2)
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2)
+    ], progress = true)
+
+# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
+# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
+
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.5, 0.5],
+    phystd = [0.5, 0.5], l2std = [0.02, 0.02],
+    priorsNNw = (0.0, 5.0), phystdnew = [0.5, 0.5],
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),aa
+    param = [
+        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
+        # Normal(3, 2),
+        # Normal(4, 2),
+        Normal(3, 2),
+        Normal(3, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+function calculate_derivatives2(indvar,depvar)
+    x̂, time = indvar,depvar
+    num_points = length(x̂)
+    # Initialize an array to store the derivative values.
+    derivatives = similar(x̂)
+
+    for i in 2:(num_points - 1)
+        # Calculate the first-order derivative using central differences.
+        Δt_forward = time[i + 1] - time[i]
+        Δt_backward = time[i] - time[i - 1]
+
+        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+        derivatives[i] = derivative
+    end
+
+    # Derivatives at the endpoints can be calculated using forward or backward differences.
+    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+    return derivatives
+end
+dataset[1]
+dataset[2]
+dataset[1][:,1]=calculate_derivatives2(dataset[1][:,2], dataset[1][:,1])
+dataset[2][:,1]=calculate_derivatives2(dataset[2][:,2], dataset[2][:,1])
+dataset[1]
+dataset[2]
+sol7 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.5, 0.5],
+    phystd = [0.5, 0.5], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(0, 2),
+        Normal(0, 2)
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2)
+    ], progress = true)
+
+# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
+# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
+
+sol8 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),aa
+    param = [
+        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
+        # Normal(3, 2),
+        # Normal(4, 2),
+        Normal(0, 2),
+        Normal(0, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+timepoints = collect(0.0:(1 / 100.0):9.0)
+plot!(timepoints', chainl[1](timepoints', sol5_4.estimated_nn_params[1], st)[1])
+plot!(timepoints, chainl[2](timepoints', sol5_4.estimated_nn_params[2], st)[1])
+
+using Plots, StatsPlots
+plotly()
+
+plot(time, u[1, :])
+plot!(time, u[2, :])
+scatter!(time, u_noisy[1, :])
+scatter!(time, u_noisy[2, :])
+scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
+scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
+
+# plot28(sol4 seems better vs sol3 plots, params seems similar)
+plot!(sol3.timepoints[1]', sol3.ensemblesol[1],legend=nothing)
+plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
+plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
+plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
+
+plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1],legend=nothing)
+plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
+plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1],legend=nothing)
+plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
+
+plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1],legend=nothing)
+plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
+plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
+plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
+plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1],legend=nothing)
+plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
+
+
+# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
+plot!(sol5.timepoints[1]', sol5.ensemblesol[1],legend=nothing)
+plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
+plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
+plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
+
+# plot52 sol7 vs sol5(sol5 overall better plots, params?)
+plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
+plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
+
+# sol8,sol8_2,sol9,sol9_2 bad
+plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
+plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
+plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
+plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
+
+plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
+plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
+plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
+plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
+
+
+plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
+plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2],legend=nothing)
+
+plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
+plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2],legend=nothing)
+plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
+plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
+
+plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
+plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
+
+plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1],legend=nothing)
+plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
+plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1],legend=nothing)
+plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
+
+plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
+plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2],legend=nothing)
+
+plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1])
+plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2],legend=nothing)
+
+# test with lower number of points
+# test same calls 2 times or more
+# consider full range dataset case
+# combination of all above
+
+# run 1 100 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol3.estimated_de_params
+sol4.estimated_de_params
+
+# p = [2/3, 2/3, 1/3, 1/3]
+sol3.estimated_de_params
+sol4.estimated_de_params
+dataset[1]
+eqs
+α, β, γ, δ = p
+p
+#  1.0
+#  0.6666666666666666
+#  1.0
+#  0.33333333333333333
+
+1/a
+1/c
+eqs
+using StatsPlots
+plotly()
+plot(sol3.original.mcmc_chain)
+plot(sol4.original.mcmc_chain)
+
+# 4-element Vector{Particles{Float64, 34}}:
+#  1.23 ± 0.022
+#  0.858 ± 0.011
+#  3.04 ± 0.079
+#  1.03 ± 0.024
+# 4-element Vector{Particles{Float64, 34}}:
+#  1.2 ± 0.0069
+#  0.835 ± 0.006
+#  3.22 ± 0.01
+#  1.08 ± 0.0053
+# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
+# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
+
+# sol3 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# sol = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     # Kernel = AdvancedHMC.NUTS(0.8),
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ], progress = true)
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
+# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
+
+sol = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2)
+    ])
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    phystdnew = [0.5, 0.5],
+    #  Kernel = AdvancedHMC.NUTS(0.8),
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2)
+    ],
+    Dict_differentials = Dict_differentials)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+# points1 = []
+# for eq_arg in eq_args
+#     a = []
+#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
+#     for i in eachindex(symbols_input)
+#         if symbols_input[i][2] == eq_arg
+#             # include domain points of that depvar
+#             # each loss equation take domain matrix [points..;points..]
+#             push!(a, train_sets[i][:, 2:end]')
+#         end
+#     end
+#     # vcat as new row for next equation
+#     push!(points1, vcat(a...))
+# end
+# println(points1 == points)
+
+# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+# import ModelingToolkit: Interval, infimum, supremum, Distributions
+# using Plots, MonteCarloMeasurements
+
+# @parameters x, t, α
+# @variables u(..)
+# Dt = Differential(t)
+# Dx = Differential(x)
+# Dx2 = Differential(x)^2
+# Dx3 = Differential(x)^3
+# Dx4 = Differential(x)^4
+
+# # α = 1
+# β = 4
+# γ = 1
+# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+# bcs = [u(x, 0) ~ u_analytic(x, 0),
+#     u(-10, t) ~ u_analytic(-10, t),
+#     u(10, t) ~ u_analytic(10, t),
+#     Dx(u(-10, t)) ~ du(-10, t),
+#     Dx(u(10, t)) ~ du(10, t)]
+
+# # Space and time domains
+# domains = [x ∈ Interval(-10.0, 10.0),
+#     t ∈ Interval(0.0, 1.0)]
+
+# # Discretization
+# dx = 0.4;
+# dt = 0.2;
+
+# # Function to compute analytical solution at a specific point (x, t)
+# function u_analytic_point(x, t)
+#     z = -x / 2 + t
+#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# end
+
+# # Function to generate the dataset matrix
+# function generate_dataset_matrix(domains, dx, dt)
+#     x_values = -10:dx:10
+#     t_values = 0.0:dt:1.0
+
+#     dataset = []
+
+#     for t in t_values
+#         for x in x_values
+#             u_value = u_analytic_point(x, t)
+#             push!(dataset, [u_value, x, t])
+#         end
+#     end
+
+#     return vcat([data' for data in dataset]...)
+# end
+
+# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
+
+# # noise to dataset
+# noisydataset = deepcopy(datasetpde)
+# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
+#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
+#                         noisydataset[1][:, 1]
+
+# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+# # Neural network
+# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+#     Lux.Dense(8, 8, Lux.tanh),
+#     Lux.Dense(8, 1))
+
+# discretization = NeuralPDE.BayesianPINN([chain],
+#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+# @named pde_system = PDESystem(eq,
+#     bcs,
+#     domains,
+#     [x, t],
+#     [u(x, t)],
+#     [α],
+#     defaults = Dict([α => 0.5]))
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
+
+# eqs = pde_system.eqs
+# Dict_differentials = Dict()
+# exps = toexpr.(eqs)
+# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+# sol2 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
+#     param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
+#     progress = true)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)
+
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+    dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)], progress = true)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=0.1
+@test mean(u_predict .- u_real) < 0.01
+@test sol1.estimated_de_params[1]≈param atol=0.1
+sol1.estimated_de_params[1]
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+param = 2 * π
+ts_2 = vec(sol2.timepoints[1])
+u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict_2 = pmean(sol2.ensemblesol[1])
+
+@test u_predict_2≈u_real_2 atol=0.1
+@test mean(u_predict_2 .- u_real_2) < 0.01
+@test sol2.estimated_de_params[1]≈param atol=0.1
+sol2.estimated_de_params[1]
+
+plot(ts_2, u_predict_2)
+plot!(ts_2, u_real_2)
+
+@parameters t, σ_
+@variables x(..), y(..), z(..)
+Dt = Differential(t)
+eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 1.0)]
+
+input_ = length(domains)
+n = 7
+chain = [
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1))
+]
+
+#Generate Data
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
+end
+
+u0 = [1.0; 0.0; 0.0]
+tspan = (0.0, 1.0)
+prob = ODEProblem(lorenz!, u0, tspan)
+sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+ts = sol.t
+us = hcat(sol.u...)
+us = us .+ ((0.05 .* randn(size(us))) .* us)
+ts_ = hcat(sol(ts).t...)[1, :]
+dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+    dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs, bcs, domains,
+    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100,
+    bcstd = [0.3, 0.3, 0.3],
+    phystd = [0.1, 0.1, 0.1],
+    l2std = [1, 1, 1],
+    priorsNNw = (0.0, 1.0),
+    saveats = [0.01],
+    param = [Normal(14.0, 2)], progress = true)
+
+idealp = 10.0
+p_ = sol1.estimated_de_params[1]
+@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+
+@parameters x y
+@variables u(..)
+Dxx = Differential(x)^2
+Dyy = Differential(y)^2
+
+# 2D PDE
+eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# Boundary conditions
+bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+
+# Space and time domains
+domains = [x ∈ Interval(0.0, 1.0),
+    y ∈ Interval(0.0, 1.0)]
+
+# Neural network
+dim = 2 # number of dimensions
+chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
+
+# Discretization
+dx = 0.04
+discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
+
+@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 5,
+    bcstd = [0.01, 0.01, 0.01, 0.01],
+    phystd = [0.005],
+    priorsNNw = (0.0, 2.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+xs = sol1.timepoints[1]
+sol1.ensemblesol[1]
+analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+
+dataset = hcat(u_real, xs')
+u_predict = pmean(sol1.ensemblesol[1])
+u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
+@test u_predict≈u_real atol=0.8
\ No newline at end of file

From ae387016880ff476d81c0e2a5ba1cffa06ea1a2a Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 29 Mar 2024 22:25:27 +0530
Subject: [PATCH 093/107] update BPINN_PDEinvsol_tests.jl

---
 test/BPINN_PDEinvsol_tests.jl | 1113 +--------------------------------
 1 file changed, 1 insertion(+), 1112 deletions(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 797361e7ac..53450f90da 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -242,1115 +242,4 @@ u_predict = pmean(sol2.ensemblesol[1])
 
 @test u_predict≈u_real atol=1.5
 @test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-
-
-println("Example 3: Lotka Volterra with New parameter estimation")
-@parameters t α β γ δ
-@variables x(..) y(..)
-
-Dt = Differential(t)
-eqs = [Dt(x(t))*α  ~ x(t) - β * x(t) * y(t), Dt(y(t))*γ  ~ δ * x(t) * y(t) - y(t)]
-bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
-domains = [t ∈ Interval(0.0, 7.0)]
-
-# Define the parameters' values
-# params = [α => 1.0, β => 0.5, γ => 0.5, δ => 1.0]
-# p = [1.5, 1.0, 3.0, 1.0]
-
-chainl = [
-    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),Lux.Dense(6, 1)),
-    Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),Lux.Dense(6, 1))
-]
-
-initl, st = Lux.setup(Random.default_rng(), chainl[1])
-initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
-
-using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
-
-function lotka_volterra(u, p, t)
-    # Model parameters. 
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-# initial-value problem.
-u0 = [1.0, 1.0]
-# p = [2/3, 2/3, 1/3.0, 1/3.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 7.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-dt = 0.01
-solution = solve(prob, Tsit5(); saveat = dt)
-
-
-# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
-#     smoothed_data = similar(data, T, length(data))
-
-#     for i in 1:length(data)
-#         start_idx = max(1, i - window_size)
-#         end_idx = min(length(data), i + window_size)
-#         smoothed_data[i] = mean(data[start_idx:end_idx])
-#     end
-
-#     return smoothed_data'
-# end
-
-# Extract solution
-time = solution.t
-u = hcat(solution.u...)
-time1=solution.t
-u_noisy = u .+ u .* (0.3 .* randn(size(u)))
-
-plot(time,u[1,:])
-plot!(time,u[2,:])
-scatter!(time1,u_noisy[1,:])
-scatter!(time1,u_noisy[2,:])
-
-# window_size = 5
-# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
-#                      for i in 1:length(solution.u[1])]
-# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
-# Randomly select some points from the solution
-num_points = 150  # Number of points to select
-selected_indices = rand(1:size(u_noisy, 2), num_points)
-upoints = [u_noisy[:, i] for i in selected_indices]
-timepoints = [time[i] for i in selected_indices]
-temp=hcat(upoints...)
-dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
-
-# plot(time,u[1,:])
-# plot!(time,u[2,:])
-
-discretization = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [x(t), y(t)],
-    [α, β, γ, δ],
-    defaults = Dict([α =>2, β => 3, γ =>3, δ =>2]))
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], progress = true)
-
-# time
-# dataset
-# chainl[1](time', sol3.estimated_nn_params[1], st)[1][1,:]
-# plot!(time1, chainl[1](time1', sol3.estimated_nn_params[1], st)[1][1,:])
-# plot!(time1, chainl[2](time1', sol3.estimated_nn_params[2], st)[1][1,:])
-# plot!(time1, chainl[1](time1', sol5.estimated_nn_params[1], st)[1][1,:])
-# plot!(time1, chainl[2](time1', sol5.estimated_nn_params[2], st)[1][1,:])
-# time1 = collect(0.0:(1 / 100.0):8.0)
-
-sol4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], progress = true
-)
-
-
-sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.15, 0.15],
-    phystd = [0.15, 0.15], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.05, 0.05],
-    phystd = [0.05, 0.05], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# 100 points(sol5_2 vs sol3)
-sol5_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# 100 points(sol5_2 vs sol3)
-sol5_2_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.08, 0.08],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# 100 points(sol5_2 vs sol3)
-sol5_2_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# 50 datapoint 0-5 sol5 vs sol4
-# julia> sol4.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.549 ± 0.0058
-#  0.71 ± 0.0042
-#  0.408 ± 0.0063
-#  0.355 ± 0.0015
-
-# julia> sol5.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0052
-#  0.702 ± 0.0034
-#  0.346 ± 0.0037
-#  0.335 ± 0.0013
-
-# 100 datapoint 0-5 sol5_2 vs sol3
-# julia> sol3.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.598 ± 0.0037
-#  0.711 ± 0.0027
-#  0.399 ± 0.0032
-#  0.333 ± 0.0011
-
-# julia> sol5_2.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0035
-#  0.686 ± 0.0026
-#  0.395 ± 0.0029
-#  0.328 ± 0.00095
-
-# timespan for full dataset (0-8)
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true)
-
-sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true
-)
-
-sol7 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], progress = true
-)
-
-sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-    
-sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
- 
-sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-lpfun = function f(chain::Chains) # function to compute the logpdf values
-    niter, nparams, nchains = size(chain)
-    lp = zeros(niter + nchains) # resulting logpdf values
-    for i = 1:nparams
-        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,1]')
-        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,2]')
-    end
-    return lp
-end
-
-DIC, pD = dic(sol3.original.mcmc_chain, lpfun)
-DIC1, pD1 = dic(sol4.original.mcmc_chain, lpfun)
-
-size(sol3.original.mcmc_chain)
-Array(sol3.original.mcmc_chain[1,:,:])
-length(sol3.estimated_nn_params[1])
-chainl[1](time', sol3.estimated_nn_params[1], st)[1]
-
-data = [hcat(calculate_derivatives2(dataset[i][:, 2], dataset[1][:, 1]),dataset[i][:, 2]) for i in eachindex(dataset)]
-dataset[1][:,1]
-dataset[2]
-plot!(dataset[1][:,2],dataset[1][:,1])
-eqs
-sol5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.02, 0.02],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(3, 2),
-        Normal(3, 2)
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2)
-    ], progress = true)
-
-# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
-# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
-
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.5, 0.5],
-    phystd = [0.5, 0.5], l2std = [0.02, 0.02],
-    priorsNNw = (0.0, 5.0), phystdnew = [0.5, 0.5],
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),aa
-    param = [
-        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
-        # Normal(3, 2),
-        # Normal(4, 2),
-        Normal(3, 2),
-        Normal(3, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-function calculate_derivatives2(indvar,depvar)
-    x̂, time = indvar,depvar
-    num_points = length(x̂)
-    # Initialize an array to store the derivative values.
-    derivatives = similar(x̂)
-
-    for i in 2:(num_points - 1)
-        # Calculate the first-order derivative using central differences.
-        Δt_forward = time[i + 1] - time[i]
-        Δt_backward = time[i] - time[i - 1]
-
-        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-        derivatives[i] = derivative
-    end
-
-    # Derivatives at the endpoints can be calculated using forward or backward differences.
-    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-    return derivatives
-end
-dataset[1]
-dataset[2]
-dataset[1][:,1]=calculate_derivatives2(dataset[1][:,2], dataset[1][:,1])
-dataset[2][:,1]=calculate_derivatives2(dataset[2][:,2], dataset[2][:,1])
-dataset[1]
-dataset[2]
-sol7 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.5, 0.5],
-    phystd = [0.5, 0.5], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(0, 2),
-        Normal(0, 2)
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2)
-    ], progress = true)
-
-# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
-# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
-
-sol8 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),aa
-    param = [
-        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
-        # Normal(3, 2),
-        # Normal(4, 2),
-        Normal(0, 2),
-        Normal(0, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-timepoints = collect(0.0:(1 / 100.0):9.0)
-plot!(timepoints', chainl[1](timepoints', sol5_4.estimated_nn_params[1], st)[1])
-plot!(timepoints, chainl[2](timepoints', sol5_4.estimated_nn_params[2], st)[1])
-
-using Plots, StatsPlots
-plotly()
-
-plot(time, u[1, :])
-plot!(time, u[2, :])
-scatter!(time, u_noisy[1, :])
-scatter!(time, u_noisy[2, :])
-scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
-scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
-
-# plot28(sol4 seems better vs sol3 plots, params seems similar)
-plot!(sol3.timepoints[1]', sol3.ensemblesol[1],legend=nothing)
-plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
-plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
-plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
-
-plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1],legend=nothing)
-plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
-plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1],legend=nothing)
-plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
-
-plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1],legend=nothing)
-plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
-plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
-plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
-plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1],legend=nothing)
-plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
-
-
-# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
-plot!(sol5.timepoints[1]', sol5.ensemblesol[1],legend=nothing)
-plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
-plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
-plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
-
-# plot52 sol7 vs sol5(sol5 overall better plots, params?)
-plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
-plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
-
-# sol8,sol8_2,sol9,sol9_2 bad
-plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
-plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
-plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
-plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
-
-plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
-plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
-plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
-plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
-
-
-plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
-plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2],legend=nothing)
-
-plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
-plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2],legend=nothing)
-plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
-plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
-
-plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
-plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
-
-plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1],legend=nothing)
-plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
-plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1],legend=nothing)
-plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
-
-plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
-plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2],legend=nothing)
-
-plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1])
-plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2],legend=nothing)
-
-# test with lower number of points
-# test same calls 2 times or more
-# consider full range dataset case
-# combination of all above
-
-# run 1 100 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol3.estimated_de_params
-sol4.estimated_de_params
-
-# p = [2/3, 2/3, 1/3, 1/3]
-sol3.estimated_de_params
-sol4.estimated_de_params
-dataset[1]
-eqs
-α, β, γ, δ = p
-p
-#  1.0
-#  0.6666666666666666
-#  1.0
-#  0.33333333333333333
-
-1/a
-1/c
-eqs
-using StatsPlots
-plotly()
-plot(sol3.original.mcmc_chain)
-plot(sol4.original.mcmc_chain)
-
-# 4-element Vector{Particles{Float64, 34}}:
-#  1.23 ± 0.022
-#  0.858 ± 0.011
-#  3.04 ± 0.079
-#  1.03 ± 0.024
-# 4-element Vector{Particles{Float64, 34}}:
-#  1.2 ± 0.0069
-#  0.835 ± 0.006
-#  3.22 ± 0.01
-#  1.08 ± 0.0053
-# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
-# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
-
-# sol3 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# sol = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     # Kernel = AdvancedHMC.NUTS(0.8),
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ], progress = true)
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
-# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
-
-sol = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ])
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    phystdnew = [0.5, 0.5],
-    #  Kernel = AdvancedHMC.NUTS(0.8),
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ],
-    Dict_differentials = Dict_differentials)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-# points1 = []
-# for eq_arg in eq_args
-#     a = []
-#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
-#     for i in eachindex(symbols_input)
-#         if symbols_input[i][2] == eq_arg
-#             # include domain points of that depvar
-#             # each loss equation take domain matrix [points..;points..]
-#             push!(a, train_sets[i][:, 2:end]')
-#         end
-#     end
-#     # vcat as new row for next equation
-#     push!(points1, vcat(a...))
-# end
-# println(points1 == points)
-
-# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-# import ModelingToolkit: Interval, infimum, supremum, Distributions
-# using Plots, MonteCarloMeasurements
-
-# @parameters x, t, α
-# @variables u(..)
-# Dt = Differential(t)
-# Dx = Differential(x)
-# Dx2 = Differential(x)^2
-# Dx3 = Differential(x)^3
-# Dx4 = Differential(x)^4
-
-# # α = 1
-# β = 4
-# γ = 1
-# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-# bcs = [u(x, 0) ~ u_analytic(x, 0),
-#     u(-10, t) ~ u_analytic(-10, t),
-#     u(10, t) ~ u_analytic(10, t),
-#     Dx(u(-10, t)) ~ du(-10, t),
-#     Dx(u(10, t)) ~ du(10, t)]
-
-# # Space and time domains
-# domains = [x ∈ Interval(-10.0, 10.0),
-#     t ∈ Interval(0.0, 1.0)]
-
-# # Discretization
-# dx = 0.4;
-# dt = 0.2;
-
-# # Function to compute analytical solution at a specific point (x, t)
-# function u_analytic_point(x, t)
-#     z = -x / 2 + t
-#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# end
-
-# # Function to generate the dataset matrix
-# function generate_dataset_matrix(domains, dx, dt)
-#     x_values = -10:dx:10
-#     t_values = 0.0:dt:1.0
-
-#     dataset = []
-
-#     for t in t_values
-#         for x in x_values
-#             u_value = u_analytic_point(x, t)
-#             push!(dataset, [u_value, x, t])
-#         end
-#     end
-
-#     return vcat([data' for data in dataset]...)
-# end
-
-# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
-
-# # noise to dataset
-# noisydataset = deepcopy(datasetpde)
-# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
-#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
-#                         noisydataset[1][:, 1]
-
-# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-# # Neural network
-# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-#     Lux.Dense(8, 8, Lux.tanh),
-#     Lux.Dense(8, 1))
-
-# discretization = NeuralPDE.BayesianPINN([chain],
-#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-# @named pde_system = PDESystem(eq,
-#     bcs,
-#     domains,
-#     [x, t],
-#     [u(x, t)],
-#     [α],
-#     defaults = Dict([α => 0.5]))
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
-
-# eqs = pde_system.eqs
-# Dict_differentials = Dict()
-# exps = toexpr.(eqs)
-# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-# sol2 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
-#     param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
-#     progress = true)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
-
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-    dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)], progress = true)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=0.1
-@test mean(u_predict .- u_real) < 0.01
-@test sol1.estimated_de_params[1]≈param atol=0.1
-sol1.estimated_de_params[1]
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-param = 2 * π
-ts_2 = vec(sol2.timepoints[1])
-u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict_2 = pmean(sol2.ensemblesol[1])
-
-@test u_predict_2≈u_real_2 atol=0.1
-@test mean(u_predict_2 .- u_real_2) < 0.01
-@test sol2.estimated_de_params[1]≈param atol=0.1
-sol2.estimated_de_params[1]
-
-plot(ts_2, u_predict_2)
-plot!(ts_2, u_real_2)
-
-@parameters t, σ_
-@variables x(..), y(..), z(..)
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-
-input_ = length(domains)
-n = 7
-chain = [
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1))
-]
-
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
-end
-
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-ts = sol.t
-us = hcat(sol.u...)
-us = us .+ ((0.05 .* randn(size(us))) .* us)
-ts_ = hcat(sol(ts).t...)[1, :]
-dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-    dataset = [dataset, nothing])
-
-@named pde_system = PDESystem(eqs, bcs, domains,
-    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100,
-    bcstd = [0.3, 0.3, 0.3],
-    phystd = [0.1, 0.1, 0.1],
-    l2std = [1, 1, 1],
-    priorsNNw = (0.0, 1.0),
-    saveats = [0.01],
-    param = [Normal(14.0, 2)], progress = true)
-
-idealp = 10.0
-p_ = sol1.estimated_de_params[1]
-@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-
-@parameters x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-
-# 2D PDE
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# Boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
-
-# Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
-
-# Neural network
-dim = 2 # number of dimensions
-chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
-
-# Discretization
-dx = 0.04
-discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
-
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 5,
-    bcstd = [0.01, 0.01, 0.01, 0.01],
-    phystd = [0.005],
-    priorsNNw = (0.0, 2.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-xs = sol1.timepoints[1]
-sol1.ensemblesol[1]
-analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-
-dataset = hcat(u_real, xs')
-u_predict = pmean(sol1.ensemblesol[1])
-u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-@test u_predict≈u_real atol=0.8
\ No newline at end of file
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file

From 715a5ea09840da30027e60d084645ff5ee7f0fef Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 30 Mar 2024 00:06:01 +0530
Subject: [PATCH 094/107] update BPINN_PDEinvsol_tests.jl

---
 test/BPINN_PDEinvsol_tests.jl | 73 ++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 36 deletions(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 53450f90da..b71047cdca 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -34,42 +34,43 @@ Random.seed!(100)
     u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
     dataset = [hcat(u1, timepoints)]
 
-    # checking all training strategies
-    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
-
-    discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
-        dataset = [dataset, nothing])
-
-    ahmc_bayesian_pinn_pde(pde_system,
-        discretization;
-        draw_samples = 1500,
-        bcstd = [0.05],
-        phystd = [0.01], l2std = [0.01],
-        priorsNNw = (0.0, 1.0),
-        saveats = [1 / 50.0],
-        param = [LogNormal(6.0, 0.5)])
+    # TODO: correct implementations
+    # # checking all training strategies
+    # discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
+    #     dataset = [dataset, nothing])
+
+    # ahmc_bayesian_pinn_pde(pde_system,
+    #     discretization;
+    #     draw_samples = 1500,
+    #     bcstd = [0.05],
+    #     phystd = [0.01], l2std = [0.01],
+    #     priorsNNw = (0.0, 1.0),
+    #     saveats = [1 / 50.0],
+    #     param = [LogNormal(6.0, 0.5)])
+
+    # discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
+    #     dataset = [dataset, nothing])
+
+    # ahmc_bayesian_pinn_pde(pde_system,
+    #     discretization;
+    #     draw_samples = 1500,
+    #     bcstd = [0.05],
+    #     phystd = [0.01], l2std = [0.01],
+    #     priorsNNw = (0.0, 1.0),
+    #     saveats = [1 / 50.0],
+    #     param = [LogNormal(6.0, 0.5)])
+
+    # discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
+    #     dataset = [dataset, nothing])
+
+    # ahmc_bayesian_pinn_pde(pde_system,
+    #     discretization;
+    #     draw_samples = 1500,
+    #     bcstd = [0.05],
+    #     phystd = [0.01], l2std = [0.01],
+    #     priorsNNw = (0.0, 1.0),
+    #     saveats = [1 / 50.0],
+    #     param = [LogNormal(6.0, 0.5)])
 
     discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
         dataset = [dataset, nothing])

From f745ef870566f7958e783b1a87835af1d896055c Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 5 May 2024 01:50:15 +0530
Subject: [PATCH 095/107] changes from reviews

---
 src/collocated_estim.jl | 123 +---------------------------------------
 1 file changed, 2 insertions(+), 121 deletions(-)

diff --git a/src/collocated_estim.jl b/src/collocated_estim.jl
index 3902f74a27..0fe608e951 100644
--- a/src/collocated_estim.jl
+++ b/src/collocated_estim.jl
@@ -1,56 +1,14 @@
-# suggested extra loss function
+# suggested extra loss function for ODE solver case
 function L2loss2(Tar::LogTargetDensity, θ)
     f = Tar.prob.f
 
     # parameter estimation chosen or not
     if Tar.extraparams > 0
-        # deri_sol = deri_sol'
         autodiff = Tar.autodiff
-        # # Timepoints to enforce Physics
-        # dataset = Array(reduce(hcat, dataset)')
-        # t = dataset[end, :]
-        # û = dataset[1:(end - 1), :]
-
-        # ode_params = Tar.extraparams == 1 ?
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-        #              θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        # if length(û[:, 1]) == 1
-        #     physsol = [f(û[:, i][1],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # else
-        #     physsol = [f(û[:, i],
-        #         ode_params,
-        #         t[i])
-        #                for i in 1:length(û[1, :])]
-        # end
-        # #form of NN output matrix output dim x n
-        # deri_physsol = reduce(hcat, physsol)
-
-        # > for perfect deriv(basically gradient matching in case of an ODEFunction)
-        # in case of PDE or general ODE we would want to reduce residue of f(du,u,p,t)
-        # if length(û[:, 1]) == 1
-        #     deri_sol = [f(û[:, i][1],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # else
-        #     deri_sol = [f(û[:, i],
-        #         Tar.prob.p,
-        #         t[i])
-        #                 for i in 1:length(û[1, :])]
-        # end
-        # deri_sol = reduce(hcat, deri_sol) 
-        # deri_sol = reduce(hcat, derivatives)
-
         # Timepoints to enforce Physics 
         t = Tar.dataset[end]
         u1 = Tar.dataset[2]
         û = Tar.dataset[1]
-        # Tar(t, θ[1:(length(θ) - Tar.extraparams)])'
-        #  
 
         nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
 
@@ -71,24 +29,7 @@ function L2loss2(Tar::LogTargetDensity, θ)
         end
         #form of NN output matrix output dim x n 
         deri_physsol = reduce(hcat, physsol)
-
-        # if length(Tar.prob.u0) == 1
-        #     nnsol = [f(û[i],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # else
-        #     nnsol = [f([û[i], u1[i]],
-        #         Tar.prob.p,
-        #         t[i])
-        #              for i in 1:length(û[:, 1])]
-        # end
-        # form of NN output matrix output dim x n
-        # nnsol = reduce(hcat, nnsol)
-
-        # > Instead of dataset gradients trying NN derivatives with dataset collocation 
-        # # convert to matrix as nnsol  
-
+   
         physlogprob = 0
         for i in 1:length(Tar.prob.u0)
             # can add phystd[i] for u[i] 
@@ -102,64 +43,4 @@ function L2loss2(Tar::LogTargetDensity, θ)
     else
         return 0
     end
-end
-
-# PDE(DU,U,P,T)=0
-
-# Derivated via Central Diff
-# function calculate_derivatives2(dataset)
-#     x̂, time = dataset
-#     num_points = length(x̂)
-#     # Initialize an array to store the derivative values.
-#     derivatives = similar(x̂)
-
-#     for i in 2:(num_points - 1)
-#         # Calculate the first-order derivative using central differences.
-#         Δt_forward = time[i + 1] - time[i]
-#         Δt_backward = time[i] - time[i - 1]
-
-#         derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-#         derivatives[i] = derivative
-#     end
-
-#     # Derivatives at the endpoints can be calculated using forward or backward differences.
-#     derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-#     derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-#     return derivatives
-# end
-
-function calderivatives(prob, dataset)
-    chainflux = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
-        Flux.Dense(8, 2)) |> Flux.f64
-    # chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-    function loss(x, y)
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]) +
-        #     Flux.mse.(prob.u0[2] .+ (prob.tspan[2] .- x)' .* chainflux(x)[2, :], y[2]))
-        # sum(Flux.mse.(prob.u0[1] .+ (prob.tspan[2] .- x)' .* chainflux(x)[1, :], y[1]))
-        sum(Flux.mse.(chainflux(x), y))
-    end
-    optimizer = Flux.Optimise.ADAM(0.01)
-    epochs = 3000
-    for epoch in 1:epochs
-        Flux.train!(loss,
-            Flux.params(chainflux),
-            [(dataset[end]', dataset[1:(end - 1)])],
-            optimizer)
-    end
-
-    # A1 = (prob.u0' .+
-    #   (prob.tspan[2] .- (dataset[end]' .+ sqrt(eps(eltype(Float64)))))' .*
-    #   chainflux(dataset[end]' .+ sqrt(eps(eltype(Float64))))')
-
-    # A2 = (prob.u0' .+
-    #       (prob.tspan[2] .- (dataset[end]'))' .*
-    #       chainflux(dataset[end]')')
-
-    A1 = chainflux(dataset[end]' .+ sqrt(eps(eltype(dataset[end][1]))))
-    A2 = chainflux(dataset[end]')
-
-    gradients = (A2 .- A1) ./ sqrt(eps(eltype(dataset[end][1])))
-
-    return gradients
 end
\ No newline at end of file

From dccf7c86c1931410653c8714497f30768b7515ff Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 9 May 2024 02:07:58 +0530
Subject: [PATCH 096/107] Testing code for BPINN PDEs

---
 test/BPINN_PDEinvsol_tests.jl | 2397 ++++++++++++++++++++++++++++++++-
 1 file changed, 2396 insertions(+), 1 deletion(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index b71047cdca..d66d30fbb5 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -243,4 +243,2399 @@ u_predict = pmean(sol2.ensemblesol[1])
 
 @test u_predict≈u_real atol=1.5
 @test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+
+println("Example 3: Lotka Volterra with New parameter estimation")
+@parameters t α β γ δ
+@variables x(..) y(..)
+
+Dt = Differential(t)
+eqs = [Dt(x(t)) * α  ~ x(t) - β * x(t) * y(t), Dt(y(t)) * δ ~ x(t) * y(t) - y(t)*γ ]
+bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
+domains = [t ∈ Interval(0.0, 7.0)]
+
+# Define the parameters' values
+# α, β, γ, δ = p
+
+# regular equations
+# dx = (1.5 - y) * x # prey
+# dy = (x - 3.0) * y # predator
+# p = [1.5, 1.0, 3.0, 1.0] non transformed values
+
+# transformed equations
+# dx*0.666 = (1 - 0.666 * y) * x # prey
+# dy*1.0 = (x - 3.0) * y # predator
+# p = [0.666, 0.666, 3.0, 1.0] transformed values (change is scale also ensured!)
+
+chainl = [
+    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin),Lux.Dense(5, 1)),
+    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin),Lux.Dense(5, 1))
+]
+
+initl, st = Lux.setup(Random.default_rng(), chainl[1])
+initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
+
+using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    # Model parameters. 
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+# initial-value problem.
+u0 = [1.0, 1.0]
+# p = [2/3, 2/3, 1/3.0, 1/3.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 7.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+solution1 = solve(prob, Tsit5(); saveat = 0.02)
+
+function calculate_errors(approx_sol, solution_points)
+  # Check vector lengths match
+  if length(approx_sol) != length(solution_points)
+    error("Vectors must have the same length")
+  end
+
+  # Calculate errors
+  n = length(approx_sol)
+  errors = randn(n)
+  for i in 1:n
+    errors[i] = solution_points[i] - approx_sol[i]
+  end
+
+  # Calculate RMSE
+  rmse = sqrt(mean(errors.^2))
+
+  # Calculate MAE
+  mae = mean(abs.(errors))
+
+  # Calculate maximum absolute error
+  max_error = maximum(abs.(errors))
+
+  # Return dictionary with errors
+  return Dict(
+      "RMSE" => rmse,
+      "MAE" => mae,
+      "Max Abs Error" => max_error,
+  )
+end
+u = hcat(solution1.u...)
+u[1,:]
+sol6_2.ensemblesol[1]
+
+a1=calculate_errors(pmean(sol6_1.ensemblesol[1]), u1[1,:])
+b1=calculate_errors(pmean(sol6_1.ensemblesol[2]), u1[2,:])
+
+a=calculate_errors(pmean(sol6_2.ensemblesol[1]), u[1,:])
+b=calculate_errors(pmean(sol6_2.ensemblesol[2]), u[2,:])
+
+c=calculate_errors(pmean(sol6_L2_2.ensemblesol[1]), u[1,:])
+d=calculate_errors(pmean(sol6_L2_2.ensemblesol[2]), u[2,:])
+
+e=calculate_errors(pmean(sol6_L2_1.ensemblesol[1]), u[1,:])
+f=calculate_errors(pmean(sol6_L2_1.ensemblesol[2]), u[2,:])
+
+g=calculate_errors(pmean(sol6_L2.ensemblesol[1]), u[1,:])
+h=calculate_errors(pmean(sol6_L2.ensemblesol[2]), u[2,:])
+sol6_2.ensemblesol[1]
+sol6_2.ensemblesol[2]
+
+sol6_L2.ensemblesol[1]
+sol6_L2.ensemblesol[2]
+
+# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
+#     smoothed_data = similar(data, T, length(data))
+
+#     for i in 1:length(data)
+#         start_idx = max(1, i - window_size)
+#         end_idx = min(length(data), i + window_size)
+#         smoothed_data[i] = mean(data[start_idx:end_idx])
+#     end
+
+#     return smoothed_data'
+# end
+
+# Extract solution
+time = solution.t
+u = hcat(solution.u...)
+time1=solution.t
+u_noisy = u .+ u .* (0.2 .* randn(size(u)))
+u_noisy0 = u .+ (3.0 .* rand(size(u)[1],size(u)[2]) .- 1.5)
+u_noisy1 = u .+ (0.8.* randn(size(Array(solution))))
+u_noisy2 = u .+ (0.5.* randn(size(Array(solution))))
+
+plot(time,u[1,:])
+plot!(time,u[2,:])
+scatter!(time1,u_noisy0[1,:])
+scatter!(time1,u_noisy0[2,:])
+scatter!(discretization_08_gaussian.dataset[1][1][:,2], discretization_08_gaussian.dataset[1][1][:,1])
+scatter!(discretization_08_gaussian.dataset[1][2][:,2], discretization_08_gaussian.dataset[1][2][:,1])
+
+scatter!(discretization_05_gaussian.dataset[1][1][:,2], discretization_05_gaussian.dataset[1][1][:,1])
+scatter!(discretization_05_gaussian.dataset[1][2][:,2], discretization_05_gaussian.dataset[1][2][:,1])
+# discretization_05_gaussian.dataset[1][1][:,2]
+# window_size = 5
+# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
+#                      for i in 1:length(solution.u[1])]
+# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
+# Randomly select some points from the solution
+num_points = 100  # Number of points to select
+selected_indices = rand(1:size(u_noisy1, 2), num_points)
+upoints = [u_noisy1[:, i] for i in selected_indices]
+timepoints = [time[i] for i in selected_indices]
+temp=hcat(upoints...)
+dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
+
+discretization_uniform = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+discretization_08_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+discretization_05_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+
+discretization1 = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+
+scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
+scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
+
+sol = solve(prob, Tsit5(); saveat=0.1)
+odedata = Array(sol) + 0.8 * randn(size(Array(sol)))
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [x(t), y(t)],
+    [α, β, γ, δ],
+    defaults = Dict([α =>2, β => 2, γ =>2, δ =>2]))
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_uniform = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+
+# more iterations for above
+sol3_100_uniform_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+    
+# more iterations for above + strict BC
+sol3_100_uniform_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000_bc_hard = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.05, 0.05],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol3_100_05_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# time
+# dataset
+# chainl[1](time', sol3.estimated_nn_params[1], st)[1][1,:]
+# plot!(time1, chainl[1](time1', sol3.estimated_nn_params[1], st)[1][1,:])
+# plot!(time1, chainl[2](time1', sol3.estimated_nn_params[2], st)[1][1,:])
+# plot!(time1, chainl[1](time1', sol5.estimated_nn_params[1], st)[1][1,:])
+# plot!(time1, chainl[2](time1', sol5.estimated_nn_params[2], st)[1][1,:])
+# time1 = collect(0.0:(1 / 100.0):8.0)
+
+sol4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+sol4_0 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+
+sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+
+# 70 points in dataset
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# SOL6_1 VS SOL6_L2
+sol6_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_2_L2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol6_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_L2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+    
+sol6_L2_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol6_L2_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.05, 0.05],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+# 50 datapoint 0-5 sol5 vs sol4
+# julia> sol4.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.549 ± 0.0058
+#  0.71 ± 0.0042
+#  0.408 ± 0.0063
+#  0.355 ± 0.0015
+
+# julia> sol5.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0052
+#  0.702 ± 0.0034
+#  0.346 ± 0.0037
+#  0.335 ± 0.0013
+
+# 100 datapoint 0-5 sol5_2 vs sol3
+# julia> sol3.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.598 ± 0.0037
+#  0.711 ± 0.0027
+#  0.399 ± 0.0032
+#  0.333 ± 0.0011
+
+# julia> sol5_2.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0035
+#  0.686 ± 0.0026
+#  0.395 ± 0.0029
+#  0.328 ± 0.00095
+
+# timespan for full dataset (0-8)
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true)
+
+sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true
+)
+
+sol7 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], progress = true
+)
+
+sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+    
+sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+ 
+sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+lpfun = function f(chain::Chains) # function to compute the logpdf values
+    niter, nparams, nchains = size(chain)
+    lp = zeros(niter + nchains) # resulting logpdf values
+    for i = 1:nparams
+        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,1]')
+        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,2]')
+    end
+    return lp
+end
+
+DIC, pD = dic(sol3.original.mcmc_chain, lpfun)
+DIC1, pD1 = dic(sol4.original.mcmc_chain, lpfun)
+
+size(sol3.original.mcmc_chain)
+Array(sol3.original.mcmc_chain[1,:,:])
+length(sol3.estimated_nn_params[1])
+chainl[1](time', sol3.estimated_nn_params[1], st)[1]
+
+data = [hcat(calculate_derivatives2(dataset[i][:, 2], dataset[1][:, 1]),dataset[i][:, 2]) for i in eachindex(dataset)]
+dataset[1][:,1]
+dataset[2]
+plot!(dataset[1][:,2],dataset[1][:,1])
+eqs
+sol5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.02, 0.02],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(3, 2),
+        Normal(3, 2)
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2)
+    ], progress = true)
+
+# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
+# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
+
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.5, 0.5],
+    phystd = [0.5, 0.5], l2std = [0.02, 0.02],
+    priorsNNw = (0.0, 5.0), phystdnew = [0.5, 0.5],
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),aa
+    param = [
+        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
+        # Normal(3, 2),
+        # Normal(4, 2),
+        Normal(3, 2),
+        Normal(3, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+function calculate_derivatives2(indvar,depvar)
+    x̂, time = indvar,depvar
+    num_points = length(x̂)
+    # Initialize an array to store the derivative values.
+    derivatives = similar(x̂)
+
+    for i in 2:(num_points - 1)
+        # Calculate the first-order derivative using central differences.
+        Δt_forward = time[i + 1] - time[i]
+        Δt_backward = time[i] - time[i - 1]
+
+        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
+
+        derivatives[i] = derivative
+    end
+
+    # Derivatives at the endpoints can be calculated using forward or backward differences.
+    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
+    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
+    return derivatives
+end
+dataset[1]
+dataset[2]
+dataset[1][:,1]=calculate_derivatives2(dataset[1][:,2], dataset[1][:,1])
+dataset[2][:,1]=calculate_derivatives2(dataset[2][:,2], dataset[2][:,1])
+dataset[1]
+dataset[2]
+sol7 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 200,
+    bcstd = [0.5, 0.5],
+    phystd = [0.5, 0.5], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(0, 2),
+        Normal(0, 2)
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2),
+        # LogNormal(1, 2)
+    ], progress = true)
+
+# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
+# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
+
+sol8 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0), phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),aa
+    param = [
+        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
+        # Normal(3, 2),
+        # Normal(4, 2),
+        Normal(0, 2),
+        Normal(0, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+timepoints = collect(0.0:(1 / 100.0):9.0)
+plot!(timepoints', chainl[1](timepoints', sol5_4.estimated_nn_params[1], st)[1])
+plot!(timepoints, chainl[2](timepoints', sol5_4.estimated_nn_params[2], st)[1])
+
+sol_L2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+sol_NEW = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+sol_L2_70 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+sol_NEW_70 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0), 
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+using Plots, StatsPlots
+plotly()
+
+plot(time, u[1, :])
+plot!(time, u[2, :])
+scatter!(time, u_noisy[1, :])
+scatter!(time, u_noisy[2, :])
+scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
+scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
+
+scatter!(discretization1.dataset[1][1][:,2], discretization1.dataset[1][1][:,1],legend=nothing)
+scatter!(discretization1.dataset[1][2][:,2], discretization1.dataset[1][2][:,1])
+
+# plot28(sol4 seems better vs sol3 plots, params seems similar)
+plot!(sol3.timepoints[1]', sol3.ensemblesol[1])
+plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
+plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
+plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2])
+
+plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
+plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
+plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
+plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2])
+
+plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1],legend=nothing)
+plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
+plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1],legend=nothing)
+plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
+
+plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1],legend=nothing)
+plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
+plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
+plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
+plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1],legend=nothing)
+plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
+
+
+# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
+plot!(sol5.timepoints[1]', sol5.ensemblesol[1],legend=nothing)
+plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
+
+plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1],legend=nothing)
+plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2])
+
+plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1],legend=nothing)
+plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
+
+plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
+plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
+plot!(sol6_L2.timepoints[1]', sol6_L2.ensemblesol[1])
+plot!(sol6_L2.timepoints[2]', sol6_L2.ensemblesol[2])
+
+plot!(sol6_L2_1.timepoints[1]', sol6_L2_1.ensemblesol[1])
+plot!(sol6_L2_1.timepoints[2]', sol6_L2_1.ensemblesol[2])
+
+plot!(sol6_L2_2.timepoints[1]', sol6_L2_2.ensemblesol[1])
+plot!(sol6_L2_2.timepoints[2]', sol6_L2_2.ensemblesol[2])
+
+plot!(sol6_1.timepoints[1]', sol6_1.ensemblesol[1])
+plot!(sol6_1.timepoints[2]', sol6_1.ensemblesol[2])
+plot!(sol6_2.timepoints[1]', sol6_2.ensemblesol[1])
+plot!(sol6_2.timepoints[2]', sol6_2.ensemblesol[2],legend=nothing)
+plot!(sol6_2_L2.timepoints[1]', sol6_2_L2.ensemblesol[1])
+plot!(sol6_2_L2.timepoints[2]', sol6_2_L2.ensemblesol[2],legend=nothing)
+
+# plot52 sol7 vs sol5(sol5 overall better plots, params?)
+plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
+plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
+
+# sol8,sol8_2,sol9,sol9_2 bad
+plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
+plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
+plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
+plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
+
+plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
+plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
+plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
+plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
+
+
+plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
+plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2],legend=nothing)
+
+plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
+plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2],legend=nothing)
+plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
+plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
+
+plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
+plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
+
+plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1],legend=nothing)
+plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
+plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1],legend=nothing)
+plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
+
+plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
+plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2],legend=nothing)
+
+plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1],legend=nothing)
+plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
+
+plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
+plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2],legend=nothing)
+plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
+plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2],legend=nothing)
+
+plot!(sol3_100_05_gaussian.timepoints[1]', sol3_100_05_gaussian.ensemblesol[1])
+plot!(sol3_100_05_gaussian.timepoints[2]', sol3_100_05_gaussian.ensemblesol[2],legend=nothing)
+
+plot!(sol3_100_05_gaussian_new.timepoints[1]', sol3_100_05_gaussian_new.ensemblesol[1])
+plot!(sol3_100_05_gaussian_new.timepoints[2]', sol3_100_05_gaussian_new.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian.timepoints[1]', sol3_100_08_gaussian.ensemblesol[1])
+plot!(sol3_100_08_gaussian.timepoints[2]', sol3_100_08_gaussian.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_new.timepoints[1]', sol3_100_08_gaussian_new.ensemblesol[1])
+plot!(sol3_100_08_gaussian_new.timepoints[2]', sol3_100_08_gaussian_new.ensemblesol[2],legend=nothing)
+
+plot!(sol3_100_uniform.timepoints[1]', sol3_100_uniform.ensemblesol[1])
+plot!(sol3_100_uniform.timepoints[2]', sol3_100_uniform.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_1000.timepoints[1]', sol3_100_08_gaussian_1000.ensemblesol[1])
+plot!(sol3_100_08_gaussian_1000.timepoints[2]', sol3_100_08_gaussian_1000.ensemblesol[2])
+
+plot!(sol3_100_05_gaussian_1000.timepoints[1]', sol3_100_05_gaussian_1000.ensemblesol[1])
+plot!(sol3_100_05_gaussian_1000.timepoints[2]', sol3_100_05_gaussian_1000.ensemblesol[2])
+
+plot!(sol3_100_uniform_1000.timepoints[1]', sol3_100_uniform_1000.ensemblesol[1])
+plot!(sol3_100_uniform_1000.timepoints[2]', sol3_100_uniform_1000.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_1000_bc.timepoints[1]', sol3_100_08_gaussian_1000_bc.ensemblesol[1])
+plot!(sol3_100_08_gaussian_1000_bc.timepoints[2]', sol3_100_08_gaussian_1000_bc.ensemblesol[2])
+
+# test with lower number of points
+# test same calls 2 times or more
+# consider full range dataset case
+# combination of all above
+
+# run 1 100 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol3.estimated_de_params
+sol4.estimated_de_params
+
+# p = [2/3, 2/3, 1/3, 1/3]
+sol3.estimated_de_params
+sol4.estimated_de_params
+dataset[1]
+eqs
+α, β, γ, δ = p
+p
+#  1.0
+#  0.6666666666666666
+#  1.0
+#  0.33333333333333333
+
+1/a
+1/c
+eqs
+using StatsPlots
+plotly()
+plot(sol3.original.mcmc_chain)
+plot(sol5_00.original.mcmc_chain)
+
+# 4-element Vector{Particles{Float64, 34}}:
+#  1.23 ± 0.022
+#  0.858 ± 0.011
+#  3.04 ± 0.079
+#  1.03 ± 0.024
+# 4-element Vector{Particles{Float64, 34}}:
+#  1.2 ± 0.0069
+#  0.835 ± 0.006
+#  3.22 ± 0.01
+#  1.08 ± 0.0053
+# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
+# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
+
+# sol3 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2),
+#         Normal(0.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# sol = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     # Kernel = AdvancedHMC.NUTS(0.8),
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ], progress = true)
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 500,
+#     bcstd = [0.05, 0.05],
+#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+#     phystdnew = [0.5, 0.5],
+#     #  Kernel = AdvancedHMC.NUTS(0.8),
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 50.0],
+#     param = [
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2),
+#         Normal(1.0, 2)
+#     ],
+#     Dict_differentials = Dict_differentials, progress = true)
+
+# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
+# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
+
+sol = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    # Kernel = AdvancedHMC.NUTS(0.8),
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2)
+    ])
+
+# plot!(sol.timepoints[1]', sol.ensemblesol[1])
+# plot!(sol.timepoints[2]', sol.ensemblesol[2])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 500,
+    bcstd = [0.05, 0.05],
+    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
+    phystdnew = [0.5, 0.5],
+    #  Kernel = AdvancedHMC.NUTS(0.8),
+    priorsNNw = (0.0, 10.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2),
+        Normal(1.0, 2)
+    ],
+    Dict_differentials = Dict_differentials)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=1.5
+@test mean(u_predict .- u_real) < 0.1
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
+
+# points1 = []
+# for eq_arg in eq_args
+#     a = []
+#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
+#     for i in eachindex(symbols_input)
+#         if symbols_input[i][2] == eq_arg
+#             # include domain points of that depvar
+#             # each loss equation take domain matrix [points..;points..]
+#             push!(a, train_sets[i][:, 2:end]')
+#         end
+#     end
+#     # vcat as new row for next equation
+#     push!(points1, vcat(a...))
+# end
+# println(points1 == points)
+
+# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+# import ModelingToolkit: Interval, infimum, supremum, Distributions
+# using Plots, MonteCarloMeasurements
+
+# @parameters x, t, α
+# @variables u(..)
+# Dt = Differential(t)
+# Dx = Differential(x)
+# Dx2 = Differential(x)^2
+# Dx3 = Differential(x)^3
+# Dx4 = Differential(x)^4
+
+# # α = 1
+# β = 4
+# γ = 1
+# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+# bcs = [u(x, 0) ~ u_analytic(x, 0),
+#     u(-10, t) ~ u_analytic(-10, t),
+#     u(10, t) ~ u_analytic(10, t),
+#     Dx(u(-10, t)) ~ du(-10, t),
+#     Dx(u(10, t)) ~ du(10, t)]
+
+# # Space and time domains
+# domains = [x ∈ Interval(-10.0, 10.0),
+#     t ∈ Interval(0.0, 1.0)]
+
+# # Discretization
+# dx = 0.4;
+# dt = 0.2;
+
+# # Function to compute analytical solution at a specific point (x, t)
+# function u_analytic_point(x, t)
+#     z = -x / 2 + t
+#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+# end
+
+# # Function to generate the dataset matrix
+# function generate_dataset_matrix(domains, dx, dt)
+#     x_values = -10:dx:10
+#     t_values = 0.0:dt:1.0
+
+#     dataset = []
+
+#     for t in t_values
+#         for x in x_values
+#             u_value = u_analytic_point(x, t)
+#             push!(dataset, [u_value, x, t])
+#         end
+#     end
+
+#     return vcat([data' for data in dataset]...)
+# end
+
+# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
+
+# # noise to dataset
+# noisydataset = deepcopy(datasetpde)
+# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
+#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
+#                         noisydataset[1][:, 1]
+
+# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+# # Neural network
+# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+#     Lux.Dense(8, 8, Lux.tanh),
+#     Lux.Dense(8, 1))
+
+# discretization = NeuralPDE.BayesianPINN([chain],
+#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+# @named pde_system = PDESystem(eq,
+#     bcs,
+#     domains,
+#     [x, t],
+#     [u(x, t)],
+#     [α],
+#     defaults = Dict([α => 0.5]))
+
+# sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
+
+# eqs = pde_system.eqs
+# Dict_differentials = Dict()
+# exps = toexpr.(eqs)
+# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+# sol2 = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 100,
+#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
+#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
+#     param = [Distributions.LogNormal(0.5, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
+#     progress = true)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)
+
+# phi = discretization.phi[1]
+# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+#           for (d, dx) in zip(domains, [dx / 10, dt])]
+# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+#              for t in ts]
+# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+#            for x in xs]
+#           for t in ts]
+
+# # p1 = plot(xs, u_predict, title = "predict")
+# # p2 = plot(xs, u_real, title = "analytic")
+# # p3 = plot(xs, diff_u, title = "error")
+# # plot(p1, p2, p3)
+
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+    dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)], progress = true)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=0.1
+@test mean(u_predict .- u_real) < 0.01
+@test sol1.estimated_de_params[1]≈param atol=0.1
+sol1.estimated_de_params[1]
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+param = 2 * π
+ts_2 = vec(sol2.timepoints[1])
+u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict_2 = pmean(sol2.ensemblesol[1])
+
+@test u_predict_2≈u_real_2 atol=0.1
+@test mean(u_predict_2 .- u_real_2) < 0.01
+@test sol2.estimated_de_params[1]≈param atol=0.1
+sol2.estimated_de_params[1]
+
+plot(ts_2, u_predict_2)
+plot!(ts_2, u_real_2)
+
+@parameters t, σ_
+@variables x(..), y(..), z(..)
+Dt = Differential(t)
+eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 1.0)]
+
+input_ = length(domains)
+n = 7
+chain = [
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1))
+]
+
+#Generate Data
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
+end
+
+u0 = [1.0; 0.0; 0.0]
+tspan = (0.0, 1.0)
+prob = ODEProblem(lorenz!, u0, tspan)
+sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+ts = sol.t
+us = hcat(sol.u...)
+us = us .+ ((0.05 .* randn(size(us))) .* us)
+ts_ = hcat(sol(ts).t...)[1, :]
+dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+    dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs, bcs, domains,
+    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100,
+    bcstd = [0.3, 0.3, 0.3],
+    phystd = [0.1, 0.1, 0.1],
+    l2std = [1, 1, 1],
+    priorsNNw = (0.0, 1.0),
+    saveats = [0.01],
+    param = [Normal(14.0, 2)], progress = true)
+
+idealp = 10.0
+p_ = sol1.estimated_de_params[1]
+@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+
+@parameters x y
+@variables u(..)
+Dxx = Differential(x)^2
+Dyy = Differential(y)^2
+
+# 2D PDE
+eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# Boundary conditions
+bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+
+# Space and time domains
+domains = [x ∈ Interval(0.0, 1.0),
+    y ∈ Interval(0.0, 1.0)]
+
+# Neural network
+dim = 2 # number of dimensions
+chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
+
+# Discretization
+dx = 0.04
+discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
+
+@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 5,
+    bcstd = [0.01, 0.01, 0.01, 0.01],
+    phystd = [0.005],
+    priorsNNw = (0.0, 2.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+xs = sol1.timepoints[1]
+sol1.ensemblesol[1]
+analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+
+dataset = hcat(u_real, xs')
+u_predict = pmean(sol1.ensemblesol[1])
+u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
+@test u_predict≈u_real atol=0.8
+
+using NeuralPDE, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+import ModelingToolkit: Interval, infimum, supremum, Distributions
+using Plots, MonteCarloMeasurements
+
+@parameters x t
+@variables u(..)
+
+Dt = Differential(t)
+Dx = Differential(x)
+Dxx = Dx^2
+α = 0.05
+# Burger's equation
+eq = Dt(u(t, x)) + u(t, x) * Dx(u(t, x)) - α * Dxx(u(t, x)) ~ 0
+
+# boundary conditions
+bcs = [
+    u(0.0, x) ~ -sin(π * x),
+    u(t, -1.0) ~ 0.0,
+    u(t, 1.0) ~ 0.0
+]
+
+domains = [t ∈ Interval(0.0, 1.0), x ∈ Interval(-1.0, 1.0)]
+
+# Neural network
+chain = Lux.Chain(Dense(2, 10, Lux.σ), Dense(10, 10, Lux.σ), Dense(10, 1))
+strategy = NeuralPDE.QuadratureTraining(; abstol = 1e-6, reltol = 1e-6, batch = 200)
+
+indvars = [t, x]
+depvars = [u(t, x)]
+@named pde_system = PDESystem(eq, bcs, domains, indvars, depvars)
+
+# KS EQUATION
+using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+import ModelingToolkit: Interval, infimum, supremum, Distributions
+using Plots, MonteCarloMeasurements, StatsPlots
+
+@parameters x, t, α
+@variables u(..)
+Dt = Differential(t)
+Dx = Differential(x)
+Dx2 = Differential(x)^2
+Dx3 = Differential(x)^3
+Dx4 = Differential(x)^4
+
+# α = 1
+β = 4
+γ = 1
+eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+bcs = [u(x, 0) ~ u_analytic(x, 0),
+    u(-10, t) ~ u_analytic(-10, t),
+    u(10, t) ~ u_analytic(10, t),
+    Dx(u(-10, t)) ~ du(-10, t),
+    Dx(u(10, t)) ~ du(10, t)]
+
+# Space and time domains
+domains = [x ∈ Interval(-10.0, 10.0),
+    t ∈ Interval(0.0, 1.0)]
+
+# Discretization
+dx = 0.4;
+dt = 0.2;
+
+# Function to compute analytical solution at a specific point (x, t)
+function u_analytic_point(x, t)
+    z = -x / 2 + t
+    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+end
+
+# Function to generate the dataset matrix
+function generate_dataset_matrix(domains, dx, dt, xlim, tlim)
+    x_values = xlim[1]:dx:xlim[2]
+    t_values = tlim[1]:dt:tlim[2]
+
+    dataset = []
+
+    for t in t_values
+        for x in x_values
+            u_value = u_analytic_point(x, t)
+            push!(dataset, [u_value, x, t])
+        end
+    end
+
+    return vcat([data' for data in dataset]...)
+end
+
+#   x_values = -10:dx:10
+#     t_values = 0.0:dt:1.0
+
+#     dataset = []
+
+#     for t in t_values
+#         for x in x_values
+#             u_value = u_analytic_point(x, t)
+#             push!(dataset, [u_value, x, t])
+#         end
+#     end
+# dataset
+#    pop= vcat([data' for data in dataset]...)
+
+datasetpde = [generate_dataset_matrix(domains, dx, dt, [-10,10], [0.0,1.0])]
+
+datasetpde_new = [generate_dataset_matrix(domains, dx, dt, [-10,0], [0.0,1.0])]
+
+# noise to dataset
+noisydataset = deepcopy(datasetpde)
+noisydataset[1][:, 1] = noisydataset[1][:, 1] .+ (randn(size(noisydataset[1][:, 1])) .* 0.8)
+
+noisydataset_new = deepcopy(datasetpde_new)
+noisydataset_new[1][:, 1] = noisydataset_new[1][:, 1] .+ (randn(size(noisydataset_new[1][:, 1])) .* 0.8)
+
+# a=discretization_new.dataset[1]
+
+plotly()
+plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+# scatter!(a[1][:, 2], a[1][:, 1])
+scatter!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+plot(datasetpde[1][:, 2],datasetpde[1][:, 3], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+scatter!(noisydataset[1][:, 2],noisydataset[1][:, 3], noisydataset[1][:, 1])
+
+plotly()
+plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
+scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 1])
+
+plot(datasetpde_new[1][:, 2],datasetpde_new[1][:, 3], datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
+scatter!(noisydataset_new[1][:, 2],noisydataset_new[1][:, 3], noisydataset_new[1][:, 1])
+
+noise_std = 1.4
+original_data = datasetpde[1][:, 1]
+original_std = std(original_data)
+ratio = noise_std / original_std
+
+
+using StatsPlots
+plot(sol1.original.mcmc_chain)
+plot(sol2.original.mcmc_chain)
+
+plot(sol0_new.original.mcmc_chain)
+plot(sol2_new.original.mcmc_chain)
+
+# Neural network
+chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+    Lux.Dense(8, 8, Lux.tanh),
+    Lux.Dense(8, 1))
+
+chain_more = Lux.Chain(Lux.Dense(2, 10, Lux.tanh),
+    Lux.Dense(10, 10, Lux.tanh),
+    Lux.Dense(10, 1))
+# chain = Lux.Chain(Lux.Dense(2, 8, Lux.σ),
+#     Lux.Dense(8, 8, Lux.σ),
+#     Lux.Dense(8, 1))
+
+discretization = NeuralPDE.BayesianPINN([chain],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+discretization_more = NeuralPDE.BayesianPINN([chain_more],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+discretization_new = NeuralPDE.BayesianPINN([chain],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset_new, nothing])
+
+
+@named pde_system = PDESystem(eq,
+    bcs,
+    domains,
+    [x, t],
+    [u(x, t)],
+    [α],
+    defaults = Dict([α => 2.0]))
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+Dict_differentials
+
+plot(sol1.original.mcmc_chain)
+meanplot(sol1.original.mcmc_chain)
+autocorplot(sol1.original.mcmc_chain)
+traceplot(sol1.original.mcmc_chain)
+
+plot(sol2.original.mcmc_chain)
+meanplot(sol2.original.mcmc_chain)
+autocorplot(sol2.original.mcmc_chain)
+traceplot(sol2.original.mcmc_chain)
+
+plot(sol0_new.original.mcmc_chain)
+meanplot(sol0_new.original.mcmc_chain)
+autocorplot(sol0_new.original.mcmc_chain)
+
+plot(sol2_new.original.mcmc_chain)
+meanplot(sol2_new.original.mcmc_chain)
+autocorplot(sol2_new.original.mcmc_chain)
+
+plot(sol3_new.original.mcmc_chain)
+meanplot(sol3_new.original.mcmc_chain)
+autocorplot(sol3_new.original.mcmc_chain)
+
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_more = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_more;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.7],
+    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol2_more = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_more;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+# julia> sol2 = ahmc_bayesian_pinn_pde(pde_system,
+#            discretization;
+#            draw_samples = 85, Kernel = AdvancedHMC.NUTS(0.8),
+#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+#            phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0,
+#            priorsNNw = (0.0, 3.0),
+#            saveats = [1 / 100.0, 1 / 100.0],
+#            progress = true)
+# ┌ Info: Current Physics Log-likelihood : 
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -415167
+# ┌ Info: Current Prior Log-likelihood : 
+# └   priorlogpdf(ℓπ, initial_θ) = -214.1825373360679
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, initial_θ) = -109309.44973223892
+# Sampling 100%|███████████████████████████████| Time: 0:14:50
+#   iterations:                                   85
+#   ratio_divergent_transitions:                  0.0
+#   ratio_divergent_transitions_during_adaption:  0.02
+#   n_steps:                                      127
+#   is_accept:                                    true
+#   acceptance_rate:                              0.9982795867682919
+#   log_density:                                  -3832.934953640867
+#   hamiltonian_energy:                           4145.005901868316
+#   hamiltonian_energy_error:                     -0.07863051782624098
+#   max_hamiltonian_energy_error:                 -0.16790754244266282
+#   tree_depth:                                   7
+#   numerical_error:                              false
+#   step_size:                                    0.00018186972987192408
+#   nom_step_size:                                0.00018186972987192408
+#   is_adapt:                                     false
+#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.
+# [ Info: Sampling Complete.
+# ┌ Info: Current Physics Log-likelihood :
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -132
+# ┌ Info: Current Prior Log-likelihood :
+# └   priorlogpdf(ℓπ, samples[end]) = -219.17544656823006
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, samples[end]) = -3481.509412470054
+
+# julia> sol1 = ahmc_bayesian_pinn_pde(pde_system,
+#            discretization;
+#            draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
+#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.7],
+#            phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0,
+#            priorsNNw = (0.0, 3.0),
+#            saveats = [1 / 100.0, 1 / 100.0],
+#            Dict_differentials = Dict_differentials,
+#            progress = true)
+# ┌ Info: Current Physics Log-likelihood : 
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -394622
+# ┌ Info: Current Prior Log-likelihood : 
+# └   priorlogpdf(ℓπ, initial_θ) = -214.1657203956881
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, initial_θ) = -107600.2750860966
+# ┌ Info: Current L2_LOSSY : 
+# └   ℓπ.L2_loss2(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -352.339686469935
+# Sampling 100%|███████████████████████████████| Time: 0:38:42
+#   iterations:                                   90
+#   ratio_divergent_transitions:                  0.24
+#   ratio_divergent_transitions_during_adaption:  0.02
+#   n_steps:                                      34
+#   is_accept:                                    true
+#   acceptance_rate:                              0.0755469536430885
+#   log_density:                                  -6535.135018473582
+#   hamiltonian_energy:                           6681.540376258076
+#   hamiltonian_energy_error:                     -1.7097735125544204
+#   max_hamiltonian_energy_error:                 1216.239238705054
+#   tree_depth:                                   5
+#   numerical_error:                              true
+#   step_size:                                    0.0004111092751764056
+#   nom_step_size:                                0.0004111092751764056
+#   is_adapt:                                     false
+#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.
+# [ Info: Sampling Complete.
+# ┌ Info: Current Physics Log-likelihood : 
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -272
+# ┌ Info: Current Prior Log-likelihood : 
+# └   priorlogpdf(ℓπ, samples[end]) = -218.6535874132563
+# ┌ Info: Current MSE against dataset Log-likelihood : 
+# └   L2LossData(ℓπ, samples[end]) = -3573.449092586736
+# ┌ Info: Current L2_LOSSY : 
+# └   ℓπ.L2_loss2(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -2470.35523478
+
+using MCMCChains
+println(summarize(sol1.original.mcmc_chain))
+plot(sol1.original.mcmc_chain)
+
+sol3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.7], l2std = [0.15], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol3.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol4.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+sol0_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+
+julia> sol5_new = ahmc_bayesian_pinn_pde(pde_system,
+           discretization_new;
+           draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
+           bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+           phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+           priorsNNw = (0.0, 1.0),
+           saveats = [1 / 100.0, 1 / 100.0],
+           progress = true)
+
+sol1_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_1_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.5],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_2_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_3_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.2],
+    phystd = [0.3], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol2_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+
+sol3_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol4_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 160, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol5_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+# phi = discretization.phi[1]
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+plotly()
+p1 = plot(ts, xs, u_predict, title = "predict")
+p2 = plot(ts, xs, u_real, title = "analytic")
+p3 = plot(ts, xs, diff_u, title = "error")
+plot(p1, p2, p3)
+# julia> sol0_new = ahmc_bayesian_pinn_pde(pde_system,
+#            discretization_new;
+#            draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+#            phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+#            priorsNNw = (0.0, 1.0),
+#            saveats = [1 / 100.0, 1 / 100.0],
+#            Dict_differentials = Dict_differentials,
+#            progress = true)
+# ┌ Info: Current Physics Log-likelihood :
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -398314.38213382766
+# ┌ Info: Current Prior Log-likelihood :
+# └   priorlogpdf(ℓπ, initial_θ) = -104.7365701596561
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, initial_θ) = -58553.36940699288
+# ┌ Info: Current L2_LOSSY :
+# └   ℓπ.L2_loss2(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -281.85131447737575
+# Sampling 100%|███████████████████████████████| Time: 0:26:00
+#   iterations:                                   110
+#   ratio_divergent_transitions:                  0.2
+#   ratio_divergent_transitions_during_adaption:  0.03
+#   n_steps:                                      11
+#   is_accept:                                    true
+#   acceptance_rate:                              0.0024891070448310416
+#   log_density:                                  -13158.729119075539
+#   hamiltonian_energy:                           13212.763613683248
+#   hamiltonian_energy_error:                     0.0
+#   max_hamiltonian_energy_error:                 1492.7356803165876
+#   tree_depth:                                   3
+#   numerical_error:                              true
+#   step_size:                                    0.0002145156661425442
+#   nom_step_size:                                0.0002145156661425442
+#   is_adapt:                                     false
+#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.0, 1.0, 1.0, 1.0, 1 ...])
+# [ Info: Sampling Complete.
+# ┌ Info: Current Physics Log-likelihood :
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -908.7769621441158
+# ┌ Info: Current Prior Log-likelihood :
+# └   priorlogpdf(ℓπ, samples[end]) = -136.87645881663929
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, samples[end]) = -1404.7102059521355
+# ┌ Info: Current L2_LOSSY :
+# └   ℓπ.L2_loss2(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -10708.363203924739
+
+# julia> sol2_new = ahmc_bayesian_pinn_pde(pde_system,
+#            discretization_new;
+#            draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
+#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+#            phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+#            priorsNNw = (0.0, 1.0),
+#            saveats = [1 / 100.0, 1 / 100.0],
+#            progress = true)
+# ┌ Info: Current Physics Log-likelihood :
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -397526.19267355377
+# ┌ Info: Current Prior Log-likelihood :
+# └   priorlogpdf(ℓπ, initial_θ) = -105.03439044100367
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, initial_θ) = -60957.24454333089
+# Sampling  99%|███████████████████████████████|  ETA: 0:00:10
+#   iterations:                                   140
+#   ratio_divergent_transitions:                  0.0
+#   ratio_divergent_transitions_during_adaption:  0.01
+#   n_steps:                                      1023
+#   is_accept:                                    true
+#   acceptance_rate:                              0.972620625460237
+#   log_density:                                  -1513.1769839294327
+#   hamiltonian_energy:                           8709.204139640105
+#   hamiltonian_energy_error:                     -0.4925547801958601
+#   max_hamiltonian_energy_error:                 -1.7861646674082294
+#   tree_depth:                                   10
+#   numerical_error:                              false
+#   step_size:                                    0.00011428277138492957
+#   nom_step_size:                                0.00011428277138492957
+#   is_adapt:                                     false
+#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.0, 1.0, 1.0, 1.0, 1 ...])
+# [ Info: Sampling Complete.
+# ┌ Info: Current Physics Log-likelihood :
+# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = 115.103823132341
+# ┌ Info: Current Prior Log-likelihood :
+# └   priorlogpdf(ℓπ, samples[end]) = -198.39103020815858
+# ┌ Info: Current MSE against dataset Log-likelihood :
+# └   L2LossData(ℓπ, samples[end]) = -1429.7843027541815

From 2ae9b51d6d20e6acd4f25e13311f568324a906ff Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 9 May 2024 03:21:00 +0530
Subject: [PATCH 097/107] spelling corrections, cleared test space, seperated
 pr

---
 src/collocated_estim.jl        |   46 -
 test/BPINN_PDEinvsol_tests.jl  | 2397 +-------------------------------
 test/BPINN_pde_experimental.jl | 1669 ++++++++++++++++++++++
 test/bpinnexperimental.jl      |  140 --
 4 files changed, 1670 insertions(+), 2582 deletions(-)
 delete mode 100644 src/collocated_estim.jl
 create mode 100644 test/BPINN_pde_experimental.jl
 delete mode 100644 test/bpinnexperimental.jl

diff --git a/src/collocated_estim.jl b/src/collocated_estim.jl
deleted file mode 100644
index 0fe608e951..0000000000
--- a/src/collocated_estim.jl
+++ /dev/null
@@ -1,46 +0,0 @@
-# suggested extra loss function for ODE solver case
-function L2loss2(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        autodiff = Tar.autodiff
-        # Timepoints to enforce Physics 
-        t = Tar.dataset[end]
-        u1 = Tar.dataset[2]
-        û = Tar.dataset[1]
-
-        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        if length(Tar.prob.u0) == 1
-            physsol = [f(û[i],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        else
-            physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
-                       for i in 1:length(û)]
-        end
-        #form of NN output matrix output dim x n 
-        deri_physsol = reduce(hcat, physsol)
-   
-        physlogprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # can add phystd[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.l2std[i] * 4.0) .*
-                        ones(length(nnsol[i, :]))))),
-                nnsol[i, :])
-        end
-        return physlogprob
-    else
-        return 0
-    end
-end
\ No newline at end of file
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index d66d30fbb5..b71047cdca 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -243,2399 +243,4 @@ u_predict = pmean(sol2.ensemblesol[1])
 
 @test u_predict≈u_real atol=1.5
 @test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-
-println("Example 3: Lotka Volterra with New parameter estimation")
-@parameters t α β γ δ
-@variables x(..) y(..)
-
-Dt = Differential(t)
-eqs = [Dt(x(t)) * α  ~ x(t) - β * x(t) * y(t), Dt(y(t)) * δ ~ x(t) * y(t) - y(t)*γ ]
-bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
-domains = [t ∈ Interval(0.0, 7.0)]
-
-# Define the parameters' values
-# α, β, γ, δ = p
-
-# regular equations
-# dx = (1.5 - y) * x # prey
-# dy = (x - 3.0) * y # predator
-# p = [1.5, 1.0, 3.0, 1.0] non transformed values
-
-# transformed equations
-# dx*0.666 = (1 - 0.666 * y) * x # prey
-# dy*1.0 = (x - 3.0) * y # predator
-# p = [0.666, 0.666, 3.0, 1.0] transformed values (change is scale also ensured!)
-
-chainl = [
-    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin),Lux.Dense(5, 1)),
-    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin),Lux.Dense(5, 1))
-]
-
-initl, st = Lux.setup(Random.default_rng(), chainl[1])
-initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
-
-using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
-
-function lotka_volterra(u, p, t)
-    # Model parameters. 
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-# initial-value problem.
-u0 = [1.0, 1.0]
-# p = [2/3, 2/3, 1/3.0, 1/3.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 7.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-dt = 0.01
-solution = solve(prob, Tsit5(); saveat = dt)
-solution1 = solve(prob, Tsit5(); saveat = 0.02)
-
-function calculate_errors(approx_sol, solution_points)
-  # Check vector lengths match
-  if length(approx_sol) != length(solution_points)
-    error("Vectors must have the same length")
-  end
-
-  # Calculate errors
-  n = length(approx_sol)
-  errors = randn(n)
-  for i in 1:n
-    errors[i] = solution_points[i] - approx_sol[i]
-  end
-
-  # Calculate RMSE
-  rmse = sqrt(mean(errors.^2))
-
-  # Calculate MAE
-  mae = mean(abs.(errors))
-
-  # Calculate maximum absolute error
-  max_error = maximum(abs.(errors))
-
-  # Return dictionary with errors
-  return Dict(
-      "RMSE" => rmse,
-      "MAE" => mae,
-      "Max Abs Error" => max_error,
-  )
-end
-u = hcat(solution1.u...)
-u[1,:]
-sol6_2.ensemblesol[1]
-
-a1=calculate_errors(pmean(sol6_1.ensemblesol[1]), u1[1,:])
-b1=calculate_errors(pmean(sol6_1.ensemblesol[2]), u1[2,:])
-
-a=calculate_errors(pmean(sol6_2.ensemblesol[1]), u[1,:])
-b=calculate_errors(pmean(sol6_2.ensemblesol[2]), u[2,:])
-
-c=calculate_errors(pmean(sol6_L2_2.ensemblesol[1]), u[1,:])
-d=calculate_errors(pmean(sol6_L2_2.ensemblesol[2]), u[2,:])
-
-e=calculate_errors(pmean(sol6_L2_1.ensemblesol[1]), u[1,:])
-f=calculate_errors(pmean(sol6_L2_1.ensemblesol[2]), u[2,:])
-
-g=calculate_errors(pmean(sol6_L2.ensemblesol[1]), u[1,:])
-h=calculate_errors(pmean(sol6_L2.ensemblesol[2]), u[2,:])
-sol6_2.ensemblesol[1]
-sol6_2.ensemblesol[2]
-
-sol6_L2.ensemblesol[1]
-sol6_L2.ensemblesol[2]
-
-# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
-#     smoothed_data = similar(data, T, length(data))
-
-#     for i in 1:length(data)
-#         start_idx = max(1, i - window_size)
-#         end_idx = min(length(data), i + window_size)
-#         smoothed_data[i] = mean(data[start_idx:end_idx])
-#     end
-
-#     return smoothed_data'
-# end
-
-# Extract solution
-time = solution.t
-u = hcat(solution.u...)
-time1=solution.t
-u_noisy = u .+ u .* (0.2 .* randn(size(u)))
-u_noisy0 = u .+ (3.0 .* rand(size(u)[1],size(u)[2]) .- 1.5)
-u_noisy1 = u .+ (0.8.* randn(size(Array(solution))))
-u_noisy2 = u .+ (0.5.* randn(size(Array(solution))))
-
-plot(time,u[1,:])
-plot!(time,u[2,:])
-scatter!(time1,u_noisy0[1,:])
-scatter!(time1,u_noisy0[2,:])
-scatter!(discretization_08_gaussian.dataset[1][1][:,2], discretization_08_gaussian.dataset[1][1][:,1])
-scatter!(discretization_08_gaussian.dataset[1][2][:,2], discretization_08_gaussian.dataset[1][2][:,1])
-
-scatter!(discretization_05_gaussian.dataset[1][1][:,2], discretization_05_gaussian.dataset[1][1][:,1])
-scatter!(discretization_05_gaussian.dataset[1][2][:,2], discretization_05_gaussian.dataset[1][2][:,1])
-# discretization_05_gaussian.dataset[1][1][:,2]
-# window_size = 5
-# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
-#                      for i in 1:length(solution.u[1])]
-# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
-# Randomly select some points from the solution
-num_points = 100  # Number of points to select
-selected_indices = rand(1:size(u_noisy1, 2), num_points)
-upoints = [u_noisy1[:, i] for i in selected_indices]
-timepoints = [time[i] for i in selected_indices]
-temp=hcat(upoints...)
-dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
-
-discretization_uniform = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-discretization_08_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-discretization_05_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-
-discretization1 = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-
-scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
-scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
-
-sol = solve(prob, Tsit5(); saveat=0.1)
-odedata = Array(sol) + 0.8 * randn(size(Array(sol)))
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [x(t), y(t)],
-    [α, β, γ, δ],
-    defaults = Dict([α =>2, β => 2, γ =>2, δ =>2]))
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_uniform = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-
-# more iterations for above
-sol3_100_uniform_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-    
-# more iterations for above + strict BC
-sol3_100_uniform_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000_bc_hard = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.05, 0.05],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol3_100_05_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# time
-# dataset
-# chainl[1](time', sol3.estimated_nn_params[1], st)[1][1,:]
-# plot!(time1, chainl[1](time1', sol3.estimated_nn_params[1], st)[1][1,:])
-# plot!(time1, chainl[2](time1', sol3.estimated_nn_params[2], st)[1][1,:])
-# plot!(time1, chainl[1](time1', sol5.estimated_nn_params[1], st)[1][1,:])
-# plot!(time1, chainl[2](time1', sol5.estimated_nn_params[2], st)[1][1,:])
-# time1 = collect(0.0:(1 / 100.0):8.0)
-
-sol4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-sol4_0 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-
-sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-
-# 70 points in dataset
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# SOL6_1 VS SOL6_L2
-sol6_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_2_L2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol6_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_L2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-    
-sol6_L2_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol6_L2_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.05, 0.05],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-# 50 datapoint 0-5 sol5 vs sol4
-# julia> sol4.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.549 ± 0.0058
-#  0.71 ± 0.0042
-#  0.408 ± 0.0063
-#  0.355 ± 0.0015
-
-# julia> sol5.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0052
-#  0.702 ± 0.0034
-#  0.346 ± 0.0037
-#  0.335 ± 0.0013
-
-# 100 datapoint 0-5 sol5_2 vs sol3
-# julia> sol3.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.598 ± 0.0037
-#  0.711 ± 0.0027
-#  0.399 ± 0.0032
-#  0.333 ± 0.0011
-
-# julia> sol5_2.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0035
-#  0.686 ± 0.0026
-#  0.395 ± 0.0029
-#  0.328 ± 0.00095
-
-# timespan for full dataset (0-8)
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true)
-
-sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true
-)
-
-sol7 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], progress = true
-)
-
-sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-    
-sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
- 
-sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-lpfun = function f(chain::Chains) # function to compute the logpdf values
-    niter, nparams, nchains = size(chain)
-    lp = zeros(niter + nchains) # resulting logpdf values
-    for i = 1:nparams
-        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,1]')
-        lp += logpdf(MvNormal(Array(chain[:,i,:])) , dataset[1][:,2]')
-    end
-    return lp
-end
-
-DIC, pD = dic(sol3.original.mcmc_chain, lpfun)
-DIC1, pD1 = dic(sol4.original.mcmc_chain, lpfun)
-
-size(sol3.original.mcmc_chain)
-Array(sol3.original.mcmc_chain[1,:,:])
-length(sol3.estimated_nn_params[1])
-chainl[1](time', sol3.estimated_nn_params[1], st)[1]
-
-data = [hcat(calculate_derivatives2(dataset[i][:, 2], dataset[1][:, 1]),dataset[i][:, 2]) for i in eachindex(dataset)]
-dataset[1][:,1]
-dataset[2]
-plot!(dataset[1][:,2],dataset[1][:,1])
-eqs
-sol5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.02, 0.02],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(3, 2),
-        Normal(3, 2)
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2)
-    ], progress = true)
-
-# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
-# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
-
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.5, 0.5],
-    phystd = [0.5, 0.5], l2std = [0.02, 0.02],
-    priorsNNw = (0.0, 5.0), phystdnew = [0.5, 0.5],
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),aa
-    param = [
-        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
-        # Normal(3, 2),
-        # Normal(4, 2),
-        Normal(3, 2),
-        Normal(3, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-function calculate_derivatives2(indvar,depvar)
-    x̂, time = indvar,depvar
-    num_points = length(x̂)
-    # Initialize an array to store the derivative values.
-    derivatives = similar(x̂)
-
-    for i in 2:(num_points - 1)
-        # Calculate the first-order derivative using central differences.
-        Δt_forward = time[i + 1] - time[i]
-        Δt_backward = time[i] - time[i - 1]
-
-        derivative = (x̂[i + 1] - x̂[i - 1]) / (Δt_forward + Δt_backward)
-
-        derivatives[i] = derivative
-    end
-
-    # Derivatives at the endpoints can be calculated using forward or backward differences.
-    derivatives[1] = (x̂[2] - x̂[1]) / (time[2] - time[1])
-    derivatives[end] = (x̂[end] - x̂[end - 1]) / (time[end] - time[end - 1])
-    return derivatives
-end
-dataset[1]
-dataset[2]
-dataset[1][:,1]=calculate_derivatives2(dataset[1][:,2], dataset[1][:,1])
-dataset[2][:,1]=calculate_derivatives2(dataset[2][:,2], dataset[2][:,1])
-dataset[1]
-dataset[2]
-sol7 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.5, 0.5],
-    phystd = [0.5, 0.5], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(0, 2),
-        Normal(0, 2)
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2),
-        # LogNormal(1, 2)
-    ], progress = true)
-
-# plot(time, chainl[1](time', sol2.estimated_nn_params[1], st)[1])
-# plot!(time, chainl[2](time', sol2.estimated_nn_params[2], st)[1])
-
-sol8 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0), phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),aa
-    param = [
-        # LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3),LogNormal(2, 3)
-        # Normal(3, 2),
-        # Normal(4, 2),
-        Normal(0, 2),
-        Normal(0, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-timepoints = collect(0.0:(1 / 100.0):9.0)
-plot!(timepoints', chainl[1](timepoints', sol5_4.estimated_nn_params[1], st)[1])
-plot!(timepoints, chainl[2](timepoints', sol5_4.estimated_nn_params[2], st)[1])
-
-sol_L2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-sol_NEW = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-sol_L2_70 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-sol_NEW_70 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0), 
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-using Plots, StatsPlots
-plotly()
-
-plot(time, u[1, :])
-plot!(time, u[2, :])
-scatter!(time, u_noisy[1, :])
-scatter!(time, u_noisy[2, :])
-scatter!(discretization.dataset[1][1][:,2], discretization.dataset[1][1][:,1])
-scatter!(discretization.dataset[1][2][:,2], discretization.dataset[1][2][:,1])
-
-scatter!(discretization1.dataset[1][1][:,2], discretization1.dataset[1][1][:,1],legend=nothing)
-scatter!(discretization1.dataset[1][2][:,2], discretization1.dataset[1][2][:,1])
-
-# plot28(sol4 seems better vs sol3 plots, params seems similar)
-plot!(sol3.timepoints[1]', sol3.ensemblesol[1])
-plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
-plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
-plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2])
-
-plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
-plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
-plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
-plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2])
-
-plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1],legend=nothing)
-plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
-plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1],legend=nothing)
-plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
-
-plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1],legend=nothing)
-plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
-plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
-plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
-plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1],legend=nothing)
-plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
-
-
-# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
-plot!(sol5.timepoints[1]', sol5.ensemblesol[1],legend=nothing)
-plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
-
-plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1],legend=nothing)
-plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2])
-
-plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1],legend=nothing)
-plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
-
-plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
-plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
-plot!(sol6_L2.timepoints[1]', sol6_L2.ensemblesol[1])
-plot!(sol6_L2.timepoints[2]', sol6_L2.ensemblesol[2])
-
-plot!(sol6_L2_1.timepoints[1]', sol6_L2_1.ensemblesol[1])
-plot!(sol6_L2_1.timepoints[2]', sol6_L2_1.ensemblesol[2])
-
-plot!(sol6_L2_2.timepoints[1]', sol6_L2_2.ensemblesol[1])
-plot!(sol6_L2_2.timepoints[2]', sol6_L2_2.ensemblesol[2])
-
-plot!(sol6_1.timepoints[1]', sol6_1.ensemblesol[1])
-plot!(sol6_1.timepoints[2]', sol6_1.ensemblesol[2])
-plot!(sol6_2.timepoints[1]', sol6_2.ensemblesol[1])
-plot!(sol6_2.timepoints[2]', sol6_2.ensemblesol[2],legend=nothing)
-plot!(sol6_2_L2.timepoints[1]', sol6_2_L2.ensemblesol[1])
-plot!(sol6_2_L2.timepoints[2]', sol6_2_L2.ensemblesol[2],legend=nothing)
-
-# plot52 sol7 vs sol5(sol5 overall better plots, params?)
-plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
-plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
-
-# sol8,sol8_2,sol9,sol9_2 bad
-plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
-plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
-plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
-plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
-
-plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
-plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
-plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
-plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
-
-
-plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
-plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2],legend=nothing)
-
-plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
-plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2],legend=nothing)
-plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
-plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
-
-plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
-plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
-
-plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1],legend=nothing)
-plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
-plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1],legend=nothing)
-plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
-
-plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
-plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2],legend=nothing)
-
-plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1],legend=nothing)
-plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
-
-plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
-plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2],legend=nothing)
-plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
-plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2],legend=nothing)
-
-plot!(sol3_100_05_gaussian.timepoints[1]', sol3_100_05_gaussian.ensemblesol[1])
-plot!(sol3_100_05_gaussian.timepoints[2]', sol3_100_05_gaussian.ensemblesol[2],legend=nothing)
-
-plot!(sol3_100_05_gaussian_new.timepoints[1]', sol3_100_05_gaussian_new.ensemblesol[1])
-plot!(sol3_100_05_gaussian_new.timepoints[2]', sol3_100_05_gaussian_new.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian.timepoints[1]', sol3_100_08_gaussian.ensemblesol[1])
-plot!(sol3_100_08_gaussian.timepoints[2]', sol3_100_08_gaussian.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_new.timepoints[1]', sol3_100_08_gaussian_new.ensemblesol[1])
-plot!(sol3_100_08_gaussian_new.timepoints[2]', sol3_100_08_gaussian_new.ensemblesol[2],legend=nothing)
-
-plot!(sol3_100_uniform.timepoints[1]', sol3_100_uniform.ensemblesol[1])
-plot!(sol3_100_uniform.timepoints[2]', sol3_100_uniform.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_1000.timepoints[1]', sol3_100_08_gaussian_1000.ensemblesol[1])
-plot!(sol3_100_08_gaussian_1000.timepoints[2]', sol3_100_08_gaussian_1000.ensemblesol[2])
-
-plot!(sol3_100_05_gaussian_1000.timepoints[1]', sol3_100_05_gaussian_1000.ensemblesol[1])
-plot!(sol3_100_05_gaussian_1000.timepoints[2]', sol3_100_05_gaussian_1000.ensemblesol[2])
-
-plot!(sol3_100_uniform_1000.timepoints[1]', sol3_100_uniform_1000.ensemblesol[1])
-plot!(sol3_100_uniform_1000.timepoints[2]', sol3_100_uniform_1000.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_1000_bc.timepoints[1]', sol3_100_08_gaussian_1000_bc.ensemblesol[1])
-plot!(sol3_100_08_gaussian_1000_bc.timepoints[2]', sol3_100_08_gaussian_1000_bc.ensemblesol[2])
-
-# test with lower number of points
-# test same calls 2 times or more
-# consider full range dataset case
-# combination of all above
-
-# run 1 100 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol3.estimated_de_params
-sol4.estimated_de_params
-
-# p = [2/3, 2/3, 1/3, 1/3]
-sol3.estimated_de_params
-sol4.estimated_de_params
-dataset[1]
-eqs
-α, β, γ, δ = p
-p
-#  1.0
-#  0.6666666666666666
-#  1.0
-#  0.33333333333333333
-
-1/a
-1/c
-eqs
-using StatsPlots
-plotly()
-plot(sol3.original.mcmc_chain)
-plot(sol5_00.original.mcmc_chain)
-
-# 4-element Vector{Particles{Float64, 34}}:
-#  1.23 ± 0.022
-#  0.858 ± 0.011
-#  3.04 ± 0.079
-#  1.03 ± 0.024
-# 4-element Vector{Particles{Float64, 34}}:
-#  1.2 ± 0.0069
-#  0.835 ± 0.006
-#  3.22 ± 0.01
-#  1.08 ± 0.0053
-# # plot(time', chainl[1](time', sol1.estimated_nn_params[1], st)[1])
-# # plot!(time, chainl[2](time', sol1.estimated_nn_params[2], st)[1])
-
-# sol3 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2),
-#         Normal(0.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# sol = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     # Kernel = AdvancedHMC.NUTS(0.8),
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ], progress = true)
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 500,
-#     bcstd = [0.05, 0.05],
-#     phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-#     phystdnew = [0.5, 0.5],
-#     #  Kernel = AdvancedHMC.NUTS(0.8),
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 50.0],
-#     param = [
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2),
-#         Normal(1.0, 2)
-#     ],
-#     Dict_differentials = Dict_differentials, progress = true)
-
-# plot!(sol1.timepoints[1]', sol1.ensemblesol[1])
-# plot!(sol1.timepoints[2]', sol1.ensemblesol[2])
-
-sol = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    # Kernel = AdvancedHMC.NUTS(0.8),
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ])
-
-# plot!(sol.timepoints[1]', sol.ensemblesol[1])
-# plot!(sol.timepoints[2]', sol.ensemblesol[2])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.05, 0.05],
-    phystd = [0.005, 0.005], l2std = [0.1, 0.1],
-    phystdnew = [0.5, 0.5],
-    #  Kernel = AdvancedHMC.NUTS(0.8),
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2),
-        Normal(1.0, 2)
-    ],
-    Dict_differentials = Dict_differentials)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-# points1 = []
-# for eq_arg in eq_args
-#     a = []
-#     # for each (depvar,[indvar1..]) if indvari==indvar (eq_arg)
-#     for i in eachindex(symbols_input)
-#         if symbols_input[i][2] == eq_arg
-#             # include domain points of that depvar
-#             # each loss equation take domain matrix [points..;points..]
-#             push!(a, train_sets[i][:, 2:end]')
-#         end
-#     end
-#     # vcat as new row for next equation
-#     push!(points1, vcat(a...))
-# end
-# println(points1 == points)
-
-# using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-# import ModelingToolkit: Interval, infimum, supremum, Distributions
-# using Plots, MonteCarloMeasurements
-
-# @parameters x, t, α
-# @variables u(..)
-# Dt = Differential(t)
-# Dx = Differential(x)
-# Dx2 = Differential(x)^2
-# Dx3 = Differential(x)^3
-# Dx4 = Differential(x)^4
-
-# # α = 1
-# β = 4
-# γ = 1
-# eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-# u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-# bcs = [u(x, 0) ~ u_analytic(x, 0),
-#     u(-10, t) ~ u_analytic(-10, t),
-#     u(10, t) ~ u_analytic(10, t),
-#     Dx(u(-10, t)) ~ du(-10, t),
-#     Dx(u(10, t)) ~ du(10, t)]
-
-# # Space and time domains
-# domains = [x ∈ Interval(-10.0, 10.0),
-#     t ∈ Interval(0.0, 1.0)]
-
-# # Discretization
-# dx = 0.4;
-# dt = 0.2;
-
-# # Function to compute analytical solution at a specific point (x, t)
-# function u_analytic_point(x, t)
-#     z = -x / 2 + t
-#     return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-# end
-
-# # Function to generate the dataset matrix
-# function generate_dataset_matrix(domains, dx, dt)
-#     x_values = -10:dx:10
-#     t_values = 0.0:dt:1.0
-
-#     dataset = []
-
-#     for t in t_values
-#         for x in x_values
-#             u_value = u_analytic_point(x, t)
-#             push!(dataset, [u_value, x, t])
-#         end
-#     end
-
-#     return vcat([data' for data in dataset]...)
-# end
-
-# datasetpde = [generate_dataset_matrix(domains, dx, dt)]
-
-# # noise to dataset
-# noisydataset = deepcopy(datasetpde)
-# noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
-#                         randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
-#                         noisydataset[1][:, 1]
-
-# # plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-# # plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-# # Neural network
-# chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-#     Lux.Dense(8, 8, Lux.tanh),
-#     Lux.Dense(8, 1))
-
-# discretization = NeuralPDE.BayesianPINN([chain],
-#     GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-# @named pde_system = PDESystem(eq,
-#     bcs,
-#     domains,
-#     [x, t],
-#     [u(x, t)],
-#     [α],
-#     defaults = Dict([α => 0.5]))
-
-# sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], l2std = [0.05], param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], progress = true)
-
-# eqs = pde_system.eqs
-# Dict_differentials = Dict()
-# exps = toexpr.(eqs)
-# nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-# sol2 = ahmc_bayesian_pinn_pde(pde_system,
-#     discretization;
-#     draw_samples = 100,
-#     bcstd = [0.2, 0.2, 0.2, 0.2, 0.2],
-#     phystd = [1.0], phystdnew = [0.05], l2std = [0.05],
-#     param = [Distributions.LogNormal(0.5, 2)],
-#     priorsNNw = (0.0, 10.0),
-#     saveats = [1 / 100.0, 1 / 100.0], Dict_differentials = Dict_differentials,
-#     progress = true)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
-
-# phi = discretization.phi[1]
-# xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-#           for (d, dx) in zip(domains, [dx / 10, dt])]
-# u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-#              for t in ts]
-# u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-# diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-#            for x in xs]
-#           for t in ts]
-
-# # p1 = plot(xs, u_predict, title = "predict")
-# # p2 = plot(xs, u_real, title = "analytic")
-# # p3 = plot(xs, diff_u, title = "error")
-# # plot(p1, p2, p3)
-
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-    dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)], progress = true)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=0.1
-@test mean(u_predict .- u_real) < 0.01
-@test sol1.estimated_de_params[1]≈param atol=0.1
-sol1.estimated_de_params[1]
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-param = 2 * π
-ts_2 = vec(sol2.timepoints[1])
-u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict_2 = pmean(sol2.ensemblesol[1])
-
-@test u_predict_2≈u_real_2 atol=0.1
-@test mean(u_predict_2 .- u_real_2) < 0.01
-@test sol2.estimated_de_params[1]≈param atol=0.1
-sol2.estimated_de_params[1]
-
-plot(ts_2, u_predict_2)
-plot!(ts_2, u_real_2)
-
-@parameters t, σ_
-@variables x(..), y(..), z(..)
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-
-input_ = length(domains)
-n = 7
-chain = [
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1))
-]
-
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
-end
-
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-ts = sol.t
-us = hcat(sol.u...)
-us = us .+ ((0.05 .* randn(size(us))) .* us)
-ts_ = hcat(sol(ts).t...)[1, :]
-dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-    dataset = [dataset, nothing])
-
-@named pde_system = PDESystem(eqs, bcs, domains,
-    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100,
-    bcstd = [0.3, 0.3, 0.3],
-    phystd = [0.1, 0.1, 0.1],
-    l2std = [1, 1, 1],
-    priorsNNw = (0.0, 1.0),
-    saveats = [0.01],
-    param = [Normal(14.0, 2)], progress = true)
-
-idealp = 10.0
-p_ = sol1.estimated_de_params[1]
-@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-
-@parameters x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-
-# 2D PDE
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# Boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
-
-# Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
-
-# Neural network
-dim = 2 # number of dimensions
-chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
-
-# Discretization
-dx = 0.04
-discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
-
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 5,
-    bcstd = [0.01, 0.01, 0.01, 0.01],
-    phystd = [0.005],
-    priorsNNw = (0.0, 2.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-xs = sol1.timepoints[1]
-sol1.ensemblesol[1]
-analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-
-dataset = hcat(u_real, xs')
-u_predict = pmean(sol1.ensemblesol[1])
-u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-@test u_predict≈u_real atol=0.8
-
-using NeuralPDE, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-import ModelingToolkit: Interval, infimum, supremum, Distributions
-using Plots, MonteCarloMeasurements
-
-@parameters x t
-@variables u(..)
-
-Dt = Differential(t)
-Dx = Differential(x)
-Dxx = Dx^2
-α = 0.05
-# Burger's equation
-eq = Dt(u(t, x)) + u(t, x) * Dx(u(t, x)) - α * Dxx(u(t, x)) ~ 0
-
-# boundary conditions
-bcs = [
-    u(0.0, x) ~ -sin(π * x),
-    u(t, -1.0) ~ 0.0,
-    u(t, 1.0) ~ 0.0
-]
-
-domains = [t ∈ Interval(0.0, 1.0), x ∈ Interval(-1.0, 1.0)]
-
-# Neural network
-chain = Lux.Chain(Dense(2, 10, Lux.σ), Dense(10, 10, Lux.σ), Dense(10, 1))
-strategy = NeuralPDE.QuadratureTraining(; abstol = 1e-6, reltol = 1e-6, batch = 200)
-
-indvars = [t, x]
-depvars = [u(t, x)]
-@named pde_system = PDESystem(eq, bcs, domains, indvars, depvars)
-
-# KS EQUATION
-using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-import ModelingToolkit: Interval, infimum, supremum, Distributions
-using Plots, MonteCarloMeasurements, StatsPlots
-
-@parameters x, t, α
-@variables u(..)
-Dt = Differential(t)
-Dx = Differential(x)
-Dx2 = Differential(x)^2
-Dx3 = Differential(x)^3
-Dx4 = Differential(x)^4
-
-# α = 1
-β = 4
-γ = 1
-eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-bcs = [u(x, 0) ~ u_analytic(x, 0),
-    u(-10, t) ~ u_analytic(-10, t),
-    u(10, t) ~ u_analytic(10, t),
-    Dx(u(-10, t)) ~ du(-10, t),
-    Dx(u(10, t)) ~ du(10, t)]
-
-# Space and time domains
-domains = [x ∈ Interval(-10.0, 10.0),
-    t ∈ Interval(0.0, 1.0)]
-
-# Discretization
-dx = 0.4;
-dt = 0.2;
-
-# Function to compute analytical solution at a specific point (x, t)
-function u_analytic_point(x, t)
-    z = -x / 2 + t
-    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-end
-
-# Function to generate the dataset matrix
-function generate_dataset_matrix(domains, dx, dt, xlim, tlim)
-    x_values = xlim[1]:dx:xlim[2]
-    t_values = tlim[1]:dt:tlim[2]
-
-    dataset = []
-
-    for t in t_values
-        for x in x_values
-            u_value = u_analytic_point(x, t)
-            push!(dataset, [u_value, x, t])
-        end
-    end
-
-    return vcat([data' for data in dataset]...)
-end
-
-#   x_values = -10:dx:10
-#     t_values = 0.0:dt:1.0
-
-#     dataset = []
-
-#     for t in t_values
-#         for x in x_values
-#             u_value = u_analytic_point(x, t)
-#             push!(dataset, [u_value, x, t])
-#         end
-#     end
-# dataset
-#    pop= vcat([data' for data in dataset]...)
-
-datasetpde = [generate_dataset_matrix(domains, dx, dt, [-10,10], [0.0,1.0])]
-
-datasetpde_new = [generate_dataset_matrix(domains, dx, dt, [-10,0], [0.0,1.0])]
-
-# noise to dataset
-noisydataset = deepcopy(datasetpde)
-noisydataset[1][:, 1] = noisydataset[1][:, 1] .+ (randn(size(noisydataset[1][:, 1])) .* 0.8)
-
-noisydataset_new = deepcopy(datasetpde_new)
-noisydataset_new[1][:, 1] = noisydataset_new[1][:, 1] .+ (randn(size(noisydataset_new[1][:, 1])) .* 0.8)
-
-# a=discretization_new.dataset[1]
-
-plotly()
-plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-# scatter!(a[1][:, 2], a[1][:, 1])
-scatter!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-plot(datasetpde[1][:, 2],datasetpde[1][:, 3], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-scatter!(noisydataset[1][:, 2],noisydataset[1][:, 3], noisydataset[1][:, 1])
-
-plotly()
-plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
-scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 1])
-
-plot(datasetpde_new[1][:, 2],datasetpde_new[1][:, 3], datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
-scatter!(noisydataset_new[1][:, 2],noisydataset_new[1][:, 3], noisydataset_new[1][:, 1])
-
-noise_std = 1.4
-original_data = datasetpde[1][:, 1]
-original_std = std(original_data)
-ratio = noise_std / original_std
-
-
-using StatsPlots
-plot(sol1.original.mcmc_chain)
-plot(sol2.original.mcmc_chain)
-
-plot(sol0_new.original.mcmc_chain)
-plot(sol2_new.original.mcmc_chain)
-
-# Neural network
-chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-    Lux.Dense(8, 8, Lux.tanh),
-    Lux.Dense(8, 1))
-
-chain_more = Lux.Chain(Lux.Dense(2, 10, Lux.tanh),
-    Lux.Dense(10, 10, Lux.tanh),
-    Lux.Dense(10, 1))
-# chain = Lux.Chain(Lux.Dense(2, 8, Lux.σ),
-#     Lux.Dense(8, 8, Lux.σ),
-#     Lux.Dense(8, 1))
-
-discretization = NeuralPDE.BayesianPINN([chain],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-discretization_more = NeuralPDE.BayesianPINN([chain_more],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-discretization_new = NeuralPDE.BayesianPINN([chain],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset_new, nothing])
-
-
-@named pde_system = PDESystem(eq,
-    bcs,
-    domains,
-    [x, t],
-    [u(x, t)],
-    [α],
-    defaults = Dict([α => 2.0]))
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-Dict_differentials
-
-plot(sol1.original.mcmc_chain)
-meanplot(sol1.original.mcmc_chain)
-autocorplot(sol1.original.mcmc_chain)
-traceplot(sol1.original.mcmc_chain)
-
-plot(sol2.original.mcmc_chain)
-meanplot(sol2.original.mcmc_chain)
-autocorplot(sol2.original.mcmc_chain)
-traceplot(sol2.original.mcmc_chain)
-
-plot(sol0_new.original.mcmc_chain)
-meanplot(sol0_new.original.mcmc_chain)
-autocorplot(sol0_new.original.mcmc_chain)
-
-plot(sol2_new.original.mcmc_chain)
-meanplot(sol2_new.original.mcmc_chain)
-autocorplot(sol2_new.original.mcmc_chain)
-
-plot(sol3_new.original.mcmc_chain)
-meanplot(sol3_new.original.mcmc_chain)
-autocorplot(sol3_new.original.mcmc_chain)
-
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_more = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_more;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.7],
-    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol2_more = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_more;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-# julia> sol2 = ahmc_bayesian_pinn_pde(pde_system,
-#            discretization;
-#            draw_samples = 85, Kernel = AdvancedHMC.NUTS(0.8),
-#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-#            phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0,
-#            priorsNNw = (0.0, 3.0),
-#            saveats = [1 / 100.0, 1 / 100.0],
-#            progress = true)
-# ┌ Info: Current Physics Log-likelihood : 
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -415167
-# ┌ Info: Current Prior Log-likelihood : 
-# └   priorlogpdf(ℓπ, initial_θ) = -214.1825373360679
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, initial_θ) = -109309.44973223892
-# Sampling 100%|███████████████████████████████| Time: 0:14:50
-#   iterations:                                   85
-#   ratio_divergent_transitions:                  0.0
-#   ratio_divergent_transitions_during_adaption:  0.02
-#   n_steps:                                      127
-#   is_accept:                                    true
-#   acceptance_rate:                              0.9982795867682919
-#   log_density:                                  -3832.934953640867
-#   hamiltonian_energy:                           4145.005901868316
-#   hamiltonian_energy_error:                     -0.07863051782624098
-#   max_hamiltonian_energy_error:                 -0.16790754244266282
-#   tree_depth:                                   7
-#   numerical_error:                              false
-#   step_size:                                    0.00018186972987192408
-#   nom_step_size:                                0.00018186972987192408
-#   is_adapt:                                     false
-#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.
-# [ Info: Sampling Complete.
-# ┌ Info: Current Physics Log-likelihood :
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -132
-# ┌ Info: Current Prior Log-likelihood :
-# └   priorlogpdf(ℓπ, samples[end]) = -219.17544656823006
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, samples[end]) = -3481.509412470054
-
-# julia> sol1 = ahmc_bayesian_pinn_pde(pde_system,
-#            discretization;
-#            draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
-#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.7],
-#            phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0,
-#            priorsNNw = (0.0, 3.0),
-#            saveats = [1 / 100.0, 1 / 100.0],
-#            Dict_differentials = Dict_differentials,
-#            progress = true)
-# ┌ Info: Current Physics Log-likelihood : 
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -394622
-# ┌ Info: Current Prior Log-likelihood : 
-# └   priorlogpdf(ℓπ, initial_θ) = -214.1657203956881
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, initial_θ) = -107600.2750860966
-# ┌ Info: Current L2_LOSSY : 
-# └   ℓπ.L2_loss2(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -352.339686469935
-# Sampling 100%|███████████████████████████████| Time: 0:38:42
-#   iterations:                                   90
-#   ratio_divergent_transitions:                  0.24
-#   ratio_divergent_transitions_during_adaption:  0.02
-#   n_steps:                                      34
-#   is_accept:                                    true
-#   acceptance_rate:                              0.0755469536430885
-#   log_density:                                  -6535.135018473582
-#   hamiltonian_energy:                           6681.540376258076
-#   hamiltonian_energy_error:                     -1.7097735125544204
-#   max_hamiltonian_energy_error:                 1216.239238705054
-#   tree_depth:                                   5
-#   numerical_error:                              true
-#   step_size:                                    0.0004111092751764056
-#   nom_step_size:                                0.0004111092751764056
-#   is_adapt:                                     false
-#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.
-# [ Info: Sampling Complete.
-# ┌ Info: Current Physics Log-likelihood : 
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -272
-# ┌ Info: Current Prior Log-likelihood : 
-# └   priorlogpdf(ℓπ, samples[end]) = -218.6535874132563
-# ┌ Info: Current MSE against dataset Log-likelihood : 
-# └   L2LossData(ℓπ, samples[end]) = -3573.449092586736
-# ┌ Info: Current L2_LOSSY : 
-# └   ℓπ.L2_loss2(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -2470.35523478
-
-using MCMCChains
-println(summarize(sol1.original.mcmc_chain))
-plot(sol1.original.mcmc_chain)
-
-sol3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.7], l2std = [0.15], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol3.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol4.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-sol0_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-
-julia> sol5_new = ahmc_bayesian_pinn_pde(pde_system,
-           discretization_new;
-           draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
-           bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-           phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-           priorsNNw = (0.0, 1.0),
-           saveats = [1 / 100.0, 1 / 100.0],
-           progress = true)
-
-sol1_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_1_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.5],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_2_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_3_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew =  [0.2],
-    phystd = [0.3], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol2_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-
-sol3_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol4_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 160, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol5_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-# phi = discretization.phi[1]
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-plotly()
-p1 = plot(ts, xs, u_predict, title = "predict")
-p2 = plot(ts, xs, u_real, title = "analytic")
-p3 = plot(ts, xs, diff_u, title = "error")
-plot(p1, p2, p3)
-# julia> sol0_new = ahmc_bayesian_pinn_pde(pde_system,
-#            discretization_new;
-#            draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-#            phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-#            priorsNNw = (0.0, 1.0),
-#            saveats = [1 / 100.0, 1 / 100.0],
-#            Dict_differentials = Dict_differentials,
-#            progress = true)
-# ┌ Info: Current Physics Log-likelihood :
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -398314.38213382766
-# ┌ Info: Current Prior Log-likelihood :
-# └   priorlogpdf(ℓπ, initial_θ) = -104.7365701596561
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, initial_θ) = -58553.36940699288
-# ┌ Info: Current L2_LOSSY :
-# └   ℓπ.L2_loss2(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -281.85131447737575
-# Sampling 100%|███████████████████████████████| Time: 0:26:00
-#   iterations:                                   110
-#   ratio_divergent_transitions:                  0.2
-#   ratio_divergent_transitions_during_adaption:  0.03
-#   n_steps:                                      11
-#   is_accept:                                    true
-#   acceptance_rate:                              0.0024891070448310416
-#   log_density:                                  -13158.729119075539
-#   hamiltonian_energy:                           13212.763613683248
-#   hamiltonian_energy_error:                     0.0
-#   max_hamiltonian_energy_error:                 1492.7356803165876
-#   tree_depth:                                   3
-#   numerical_error:                              true
-#   step_size:                                    0.0002145156661425442
-#   nom_step_size:                                0.0002145156661425442
-#   is_adapt:                                     false
-#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.0, 1.0, 1.0, 1.0, 1 ...])
-# [ Info: Sampling Complete.
-# ┌ Info: Current Physics Log-likelihood :
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -908.7769621441158
-# ┌ Info: Current Prior Log-likelihood :
-# └   priorlogpdf(ℓπ, samples[end]) = -136.87645881663929
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, samples[end]) = -1404.7102059521355
-# ┌ Info: Current L2_LOSSY :
-# └   ℓπ.L2_loss2(setparameters(ℓπ, samples[end]), ℓπ.allstd) = -10708.363203924739
-
-# julia> sol2_new = ahmc_bayesian_pinn_pde(pde_system,
-#            discretization_new;
-#            draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
-#            bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-#            phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-#            priorsNNw = (0.0, 1.0),
-#            saveats = [1 / 100.0, 1 / 100.0],
-#            progress = true)
-# ┌ Info: Current Physics Log-likelihood :
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, initial_θ), ℓπ.allstd) = -397526.19267355377
-# ┌ Info: Current Prior Log-likelihood :
-# └   priorlogpdf(ℓπ, initial_θ) = -105.03439044100367
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, initial_θ) = -60957.24454333089
-# Sampling  99%|███████████████████████████████|  ETA: 0:00:10
-#   iterations:                                   140
-#   ratio_divergent_transitions:                  0.0
-#   ratio_divergent_transitions_during_adaption:  0.01
-#   n_steps:                                      1023
-#   is_accept:                                    true
-#   acceptance_rate:                              0.972620625460237
-#   log_density:                                  -1513.1769839294327
-#   hamiltonian_energy:                           8709.204139640105
-#   hamiltonian_energy_error:                     -0.4925547801958601
-#   max_hamiltonian_energy_error:                 -1.7861646674082294
-#   tree_depth:                                   10
-#   numerical_error:                              false
-#   step_size:                                    0.00011428277138492957
-#   nom_step_size:                                0.00011428277138492957
-#   is_adapt:                                     false
-#   mass_matrix:                                  DiagEuclideanMetric([1.0, 1.0, 1.0, 1.0, 1.0, 1 ...])
-# [ Info: Sampling Complete.
-# ┌ Info: Current Physics Log-likelihood :
-# └   ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd) = 115.103823132341
-# ┌ Info: Current Prior Log-likelihood :
-# └   priorlogpdf(ℓπ, samples[end]) = -198.39103020815858
-# ┌ Info: Current MSE against dataset Log-likelihood :
-# └   L2LossData(ℓπ, samples[end]) = -1429.7843027541815
+@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file
diff --git a/test/BPINN_pde_experimental.jl b/test/BPINN_pde_experimental.jl
new file mode 100644
index 0000000000..a8f4a0341e
--- /dev/null
+++ b/test/BPINN_pde_experimental.jl
@@ -0,0 +1,1669 @@
+using Test, MCMCChains, Lux, ModelingToolkit
+import ModelingToolkit: Interval, infimum, supremum
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using AdvancedHMC, Statistics, Random, Functors
+using NeuralPDE, MonteCarloMeasurements
+using ComponentArrays, ModelingToolkit
+
+Random.seed!(100)
+
+# function required to use the new loss, creates a dicitonary of differntial operator terms
+function recur_expression(exp, Dict_differentials)
+    for in_exp in exp.args
+        if !(in_exp isa Expr)
+            # skip +,== symbols, characters etc
+            continue
+
+        elseif in_exp.args[1] isa ModelingToolkit.Differential
+            # first symbol of differential term
+            # Dict_differentials for masking differential terms
+            # and resubstituting differentials in equations after putting in interpolations
+            # temp = in_exp.args[end]
+            Dict_differentials[eval(in_exp)] = Symbolics.variable("diff_$(length(Dict_differentials) + 1)")
+            return
+        else
+            recur_expression(in_exp, Dict_differentials)
+        end
+    end
+end
+
+# experiments are here
+println("Example 3: Lotka Volterra with New parameter estimation")
+@parameters t α β γ δ
+@variables x(..) y(..)
+
+Dt = Differential(t)
+eqs = [Dt(x(t)) * α ~ x(t) - β * x(t) * y(t), Dt(y(t)) * δ ~ x(t) * y(t) - y(t) * γ]
+bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
+domains = [t ∈ Interval(0.0, 7.0)]
+
+# Define the parameters' values
+# α, β, γ, δ = p
+
+# regular equations
+# dx = (1.5 - y) * x # prey
+# dy = (x - 3.0) * y # predator
+# p = [1.5, 1.0, 3.0, 1.0] non transformed values
+
+# transformed equations
+# dx*0.666 = (1 - 0.666 * y) * x # prey
+# dy*1.0 = (x - 3.0) * y # predator
+# p = [0.666, 0.666, 3.0, 1.0] transformed values (change is scale also ensured!)
+
+chainl = [
+    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin), Lux.Dense(5, 1)),
+    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin), Lux.Dense(5, 1))
+]
+
+initl, st = Lux.setup(Random.default_rng(), chainl[1])
+initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
+
+using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    # Model parameters. 
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+# initial-value problem.
+u0 = [1.0, 1.0]
+# p = [2/3, 2/3, 1/3.0, 1/3.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 7.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+solution1 = solve(prob, Tsit5(); saveat = 0.02)
+
+function calculate_errors(approx_sol, solution_points)
+    # Check vector lengths match
+    if length(approx_sol) != length(solution_points)
+        error("Vectors must have the same length")
+    end
+
+    # Calculate errors
+    n = length(approx_sol)
+    errors = randn(n)
+    for i in 1:n
+        errors[i] = solution_points[i] - approx_sol[i]
+    end
+
+    # Calculate RMSE
+    rmse = sqrt(mean(errors .^ 2))
+
+    # Calculate MAE
+    mae = mean(abs.(errors))
+
+    # Calculate maximum absolute error
+    max_error = maximum(abs.(errors))
+
+    # Return dictionary with errors
+    return Dict(
+        "RMSE" => rmse,
+        "MAE" => mae,
+        "Max Abs Error" => max_error
+    )
+end
+u = hcat(solution1.u...)
+
+a1 = calculate_errors(pmean(sol6_1.ensemblesol[1]), u1[1, :])
+b1 = calculate_errors(pmean(sol6_1.ensemblesol[2]), u1[2, :])
+
+a = calculate_errors(pmean(sol6_2.ensemblesol[1]), u[1, :])
+b = calculate_errors(pmean(sol6_2.ensemblesol[2]), u[2, :])
+
+c = calculate_errors(pmean(sol6_L2_2.ensemblesol[1]), u[1, :])
+d = calculate_errors(pmean(sol6_L2_2.ensemblesol[2]), u[2, :])
+
+e = calculate_errors(pmean(sol6_L2_1.ensemblesol[1]), u[1, :])
+f = calculate_errors(pmean(sol6_L2_1.ensemblesol[2]), u[2, :])
+
+g = calculate_errors(pmean(sol6_L2.ensemblesol[1]), u[1, :])
+h = calculate_errors(pmean(sol6_L2.ensemblesol[2]), u[2, :])
+
+# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
+#     smoothed_data = similar(data, T, length(data))
+
+#     for i in 1:length(data)
+#         start_idx = max(1, i - window_size)
+#         end_idx = min(length(data), i + window_size)
+#         smoothed_data[i] = mean(data[start_idx:end_idx])
+#     end
+
+#     return smoothed_data'
+# end
+
+# Extract solution
+time = solution.t
+u = hcat(solution.u...)
+time1 = solution.t
+u_noisy = u .+ u .* (0.2 .* randn(size(u)))
+u_noisy0 = u .+ (3.0 .* rand(size(u)[1], size(u)[2]) .- 1.5)
+u_noisy1 = u .+ (0.8 .* randn(size(Array(solution))))
+u_noisy2 = u .+ (0.5 .* randn(size(Array(solution))))
+
+plot(time, u[1, :])
+plot!(time, u[2, :])
+scatter!(time1, u_noisy0[1, :])
+scatter!(time1, u_noisy0[2, :])
+scatter!(discretization_08_gaussian.dataset[1][1][:, 2],
+    discretization_08_gaussian.dataset[1][1][:, 1])
+scatter!(discretization_08_gaussian.dataset[1][2][:, 2],
+    discretization_08_gaussian.dataset[1][2][:, 1])
+
+scatter!(discretization_05_gaussian.dataset[1][1][:, 2],
+    discretization_05_gaussian.dataset[1][1][:, 1])
+scatter!(discretization_05_gaussian.dataset[1][2][:, 2],
+    discretization_05_gaussian.dataset[1][2][:, 1])
+# discretization_05_gaussian.dataset[1][1][:,2]
+# window_size = 5
+# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
+#                      for i in 1:length(solution.u[1])]
+# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
+
+# Randomly select some points from the solution
+num_points = 100  # Number of points to select
+selected_indices = rand(1:size(u_noisy1, 2), num_points)
+upoints = [u_noisy1[:, i] for i in selected_indices]
+timepoints = [time[i] for i in selected_indices]
+temp = hcat(upoints...)
+dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
+
+discretization_uniform = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+discretization_08_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+discretization_05_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+
+discretization1 = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
+    dataset = [dataset, nothing])
+
+scatter!(discretization.dataset[1][1][:, 2], discretization.dataset[1][1][:, 1])
+scatter!(discretization.dataset[1][2][:, 2], discretization.dataset[1][2][:, 1])
+
+sol = solve(prob, Tsit5(); saveat = 0.1)
+odedata = Array(sol) + 0.8 * randn(size(Array(sol)))
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [x(t), y(t)],
+    [α, β, γ, δ],
+    defaults = Dict([α => 2, β => 2, γ => 2, δ => 2]))
+
+# creating dictionary for masking equations
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_uniform = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+# more iterations for above
+sol3_100_uniform_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+# more iterations for above + strict BC
+sol3_100_uniform_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_uniform;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_1000_bc_hard = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.05, 0.05],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_05_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 1000,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol3_100_08_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_08_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol3_100_05_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_05_gaussian;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+sol4_0 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true
+)
+
+sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# 70 points in dataset
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+# SOL6_1 VS SOL6_L2
+sol6_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_2_L2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol6_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol6_L2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.2, 0.2],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol6_L2_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+sol6_L2_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization1;
+    draw_samples = 700,
+    bcstd = [0.05, 0.05],
+    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(2, 2)
+    ], progress = true)
+
+# 50 datapoint 0-5 sol5 vs sol4
+# julia> sol4.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.549 ± 0.0058
+#  0.71 ± 0.0042
+#  0.408 ± 0.0063
+#  0.355 ± 0.0015
+
+# julia> sol5.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0052
+#  0.702 ± 0.0034
+#  0.346 ± 0.0037
+#  0.335 ± 0.0013
+
+# 100 datapoint 0-5 sol5_2 vs sol3
+# julia> sol3.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.598 ± 0.0037
+#  0.711 ± 0.0027
+#  0.399 ± 0.0032
+#  0.333 ± 0.0011
+
+# julia> sol5_2.estimated_de_params
+# 4-element Vector{Particles{Float64, 234}}:
+#  0.604 ± 0.0035
+#  0.686 ± 0.0026
+#  0.395 ± 0.0029
+#  0.328 ± 0.00095
+
+# timespan for full dataset (0-8)
+sol6 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true)
+
+sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true
+)
+
+sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], progress = true
+)
+
+sol7 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(1, 2),
+        Normal(1, 1),
+        Normal(1, 2),
+        Normal(1, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], progress = true
+)
+
+sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.1, 0.1],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.2, 0.2],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 700,
+    bcstd = [0.1, 0.1],
+    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
+    priorsNNw = (0.0, 5.0),
+    phystdnew = [0.3, 0.3],
+    saveats = [1 / 50.0],
+    param = [
+        Normal(2, 2),
+        Normal(2, 1),
+        Normal(2, 2),
+        Normal(2, 1)
+    ], Dict_differentials = Dict_differentials, progress = true)
+
+using Plots, StatsPlots
+plotly()
+plot(time, u[1, :])
+plot!(time, u[2, :])
+scatter!(time, u_noisy[1, :])
+scatter!(time, u_noisy[2, :])
+scatter!(discretization.dataset[1][1][:, 2], discretization.dataset[1][1][:, 1])
+scatter!(discretization.dataset[1][2][:, 2], discretization.dataset[1][2][:, 1])
+
+scatter!(discretization1.dataset[1][1][:, 2],
+    discretization1.dataset[1][1][:, 1], legend = nothing)
+scatter!(discretization1.dataset[1][2][:, 2], discretization1.dataset[1][2][:, 1])
+
+# plot28(sol4 seems better vs sol3 plots, params seems similar)
+plot!(sol3.timepoints[1]', sol3.ensemblesol[1])
+plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
+plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
+plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2])
+
+plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
+plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
+plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
+plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2])
+
+plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1], legend = nothing)
+plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
+plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1], legend = nothing)
+plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
+
+plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1], legend = nothing)
+plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
+plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
+plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
+plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1], legend = nothing)
+plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
+
+# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
+plot!(sol5.timepoints[1]', sol5.ensemblesol[1], legend = nothing)
+plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
+
+plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1], legend = nothing)
+plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2])
+
+plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1], legend = nothing)
+plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
+
+plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
+plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
+plot!(sol6_L2.timepoints[1]', sol6_L2.ensemblesol[1])
+plot!(sol6_L2.timepoints[2]', sol6_L2.ensemblesol[2])
+
+plot!(sol6_L2_1.timepoints[1]', sol6_L2_1.ensemblesol[1])
+plot!(sol6_L2_1.timepoints[2]', sol6_L2_1.ensemblesol[2])
+
+plot!(sol6_L2_2.timepoints[1]', sol6_L2_2.ensemblesol[1])
+plot!(sol6_L2_2.timepoints[2]', sol6_L2_2.ensemblesol[2])
+
+plot!(sol6_1.timepoints[1]', sol6_1.ensemblesol[1])
+plot!(sol6_1.timepoints[2]', sol6_1.ensemblesol[2])
+plot!(sol6_2.timepoints[1]', sol6_2.ensemblesol[1])
+plot!(sol6_2.timepoints[2]', sol6_2.ensemblesol[2], legend = nothing)
+plot!(sol6_2_L2.timepoints[1]', sol6_2_L2.ensemblesol[1])
+plot!(sol6_2_L2.timepoints[2]', sol6_2_L2.ensemblesol[2], legend = nothing)
+
+# plot52 sol7 vs sol5(sol5 overall better plots, params?)
+plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
+plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
+
+# sol8,sol8_2,sol9,sol9_2 bad
+plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
+plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
+plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
+plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
+
+plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
+plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
+plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
+plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
+
+plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
+plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2], legend = nothing)
+
+plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
+plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2], legend = nothing)
+plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
+plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
+
+plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
+plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
+
+plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1], legend = nothing)
+plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
+plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1], legend = nothing)
+plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
+
+plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
+plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2], legend = nothing)
+
+plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1], legend = nothing)
+plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
+
+plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
+plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2], legend = nothing)
+plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
+plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2], legend = nothing)
+
+plot!(sol3_100_05_gaussian.timepoints[1]', sol3_100_05_gaussian.ensemblesol[1])
+plot!(sol3_100_05_gaussian.timepoints[2]',
+    sol3_100_05_gaussian.ensemblesol[2], legend = nothing)
+
+plot!(sol3_100_05_gaussian_new.timepoints[1]', sol3_100_05_gaussian_new.ensemblesol[1])
+plot!(sol3_100_05_gaussian_new.timepoints[2]', sol3_100_05_gaussian_new.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian.timepoints[1]', sol3_100_08_gaussian.ensemblesol[1])
+plot!(sol3_100_08_gaussian.timepoints[2]', sol3_100_08_gaussian.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_new.timepoints[1]', sol3_100_08_gaussian_new.ensemblesol[1])
+plot!(sol3_100_08_gaussian_new.timepoints[2]',
+    sol3_100_08_gaussian_new.ensemblesol[2], legend = nothing)
+
+plot!(sol3_100_uniform.timepoints[1]', sol3_100_uniform.ensemblesol[1])
+plot!(sol3_100_uniform.timepoints[2]', sol3_100_uniform.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_1000.timepoints[1]', sol3_100_08_gaussian_1000.ensemblesol[1])
+plot!(sol3_100_08_gaussian_1000.timepoints[2]', sol3_100_08_gaussian_1000.ensemblesol[2])
+
+plot!(sol3_100_05_gaussian_1000.timepoints[1]', sol3_100_05_gaussian_1000.ensemblesol[1])
+plot!(sol3_100_05_gaussian_1000.timepoints[2]', sol3_100_05_gaussian_1000.ensemblesol[2])
+
+plot!(sol3_100_uniform_1000.timepoints[1]', sol3_100_uniform_1000.ensemblesol[1])
+plot!(sol3_100_uniform_1000.timepoints[2]', sol3_100_uniform_1000.ensemblesol[2])
+
+plot!(sol3_100_08_gaussian_1000_bc.timepoints[1]',
+    sol3_100_08_gaussian_1000_bc.ensemblesol[1])
+plot!(sol3_100_08_gaussian_1000_bc.timepoints[2]',
+    sol3_100_08_gaussian_1000_bc.ensemblesol[2])
+
+# test with lower number of points
+# consider full range dataset case
+# combination of all above
+
+# run 1 100 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol5.estimated_de_params
+sol6.estimated_de_params
+
+# run 2 200 iters
+sol3.estimated_de_params
+sol4.estimated_de_params
+
+# p = [2/3, 2/3, 1/3, 1/3]
+sol3.estimated_de_params
+sol4.estimated_de_params
+
+@parameters t, p
+@variables u(..)
+
+Dt = Differential(t)
+eqs = Dt(u(t)) - cos(p * t) ~ 0
+bcs = [u(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 2.0)]
+
+chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+initl, st = Lux.setup(Random.default_rng(), chainl)
+
+@named pde_system = PDESystem(eqs,
+    bcs,
+    domains,
+    [t],
+    [u(t)],
+    [p],
+    defaults = Dict([p => 4.0]))
+
+analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+timepoints = collect(0.0:(1 / 100.0):2.0)
+u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
+dataset = [hcat(u1, timepoints)]
+
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+    dataset = [dataset, nothing])
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.01],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)], progress = true)
+
+param = 2 * π
+ts = vec(sol1.timepoints[1])
+u_real = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict = pmean(sol1.ensemblesol[1])
+
+@test u_predict≈u_real atol=0.1
+@test mean(u_predict .- u_real) < 0.01
+@test sol1.estimated_de_params[1]≈param atol=0.1
+sol1.estimated_de_params[1]
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 1500,
+    bcstd = [0.05],
+    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 50.0],
+    param = [LogNormal(4.0, 2)],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+param = 2 * π
+ts_2 = vec(sol2.timepoints[1])
+u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
+u_predict_2 = pmean(sol2.ensemblesol[1])
+
+@test u_predict_2≈u_real_2 atol=0.1
+@test mean(u_predict_2 .- u_real_2) < 0.01
+@test sol2.estimated_de_params[1]≈param atol=0.1
+sol2.estimated_de_params[1]
+
+plot(ts_2, u_predict_2)
+plot!(ts_2, u_real_2)
+
+@parameters t, σ_
+@variables x(..), y(..), z(..)
+Dt = Differential(t)
+eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+domains = [t ∈ Interval(0.0, 1.0)]
+
+input_ = length(domains)
+n = 7
+chain = [
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1)),
+    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+        Lux.Dense(n, 1))
+]
+
+#Generate Data
+function lorenz!(du, u, p, t)
+    du[1] = 10.0 * (u[2] - u[1])
+    du[2] = u[1] * (28.0 - u[3]) - u[2]
+    du[3] = u[1] * u[2] - (8 / 3) * u[3]
+end
+
+u0 = [1.0; 0.0; 0.0]
+tspan = (0.0, 1.0)
+prob = ODEProblem(lorenz!, u0, tspan)
+sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+ts = sol.t
+us = hcat(sol.u...)
+us = us .+ ((0.05 .* randn(size(us))) .* us)
+ts_ = hcat(sol(ts).t...)[1, :]
+dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+    dataset = [dataset, nothing])
+
+@named pde_system = PDESystem(eqs, bcs, domains,
+    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100,
+    bcstd = [0.3, 0.3, 0.3],
+    phystd = [0.1, 0.1, 0.1],
+    l2std = [1, 1, 1],
+    priorsNNw = (0.0, 1.0),
+    saveats = [0.01],
+    param = [Normal(14.0, 2)], progress = true)
+
+idealp = 10.0
+p_ = sol1.estimated_de_params[1]
+@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+
+@parameters x y
+@variables u(..)
+Dxx = Differential(x)^2
+Dyy = Differential(y)^2
+
+# 2D PDE
+eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# Boundary conditions
+bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+
+# Space and time domains
+domains = [x ∈ Interval(0.0, 1.0),
+    y ∈ Interval(0.0, 1.0)]
+
+# Neural network
+dim = 2 # number of dimensions
+chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
+
+# Discretization
+dx = 0.04
+discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
+
+@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 5,
+    bcstd = [0.01, 0.01, 0.01, 0.01],
+    phystd = [0.005],
+    priorsNNw = (0.0, 2.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+xs = sol1.timepoints[1]
+sol1.ensemblesol[1]
+analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+
+dataset = hcat(u_real, xs')
+u_predict = pmean(sol1.ensemblesol[1])
+u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
+@test u_predict≈u_real atol=0.8
+
+# KS EQUATION
+using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
+import ModelingToolkit: Interval, infimum, supremum, Distributions
+using Plots, MonteCarloMeasurements, StatsPlots
+# plotly()
+
+@parameters x, t, α
+@variables u(..)
+Dt = Differential(t)
+Dx = Differential(x)
+Dx2 = Differential(x)^2
+Dx3 = Differential(x)^3
+Dx4 = Differential(x)^4
+
+# α = 1
+β = 4
+γ = 1
+eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+bcs = [u(x, 0) ~ u_analytic(x, 0),
+    u(-10, t) ~ u_analytic(-10, t),
+    u(10, t) ~ u_analytic(10, t),
+    Dx(u(-10, t)) ~ du(-10, t),
+    Dx(u(10, t)) ~ du(10, t)]
+
+# Space and time domains
+domains = [x ∈ Interval(-10.0, 10.0),
+    t ∈ Interval(0.0, 1.0)]
+
+# Discretization
+dx = 0.4;
+dt = 0.2;
+
+# Function to compute analytical solution at a specific point (x, t)
+function u_analytic_point(x, t)
+    z = -x / 2 + t
+    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+end
+
+# Function to generate the dataset matrix
+function generate_dataset_matrix(domains, dx, dt, xlim, tlim)
+    x_values = xlim[1]:dx:xlim[2]
+    t_values = tlim[1]:dt:tlim[2]
+
+    dataset = []
+
+    for t in t_values
+        for x in x_values
+            u_value = u_analytic_point(x, t)
+            push!(dataset, [u_value, x, t])
+        end
+    end
+
+    return vcat([data' for data in dataset]...)
+end
+
+datasetpde = [generate_dataset_matrix(domains, dx, dt, [-10, 10], [0.0, 1.0])]
+datasetpde_new = [generate_dataset_matrix(domains, dx, dt, [-10, 0], [0.0, 1.0])]
+
+# noise to dataset
+noisydataset = deepcopy(datasetpde)
+noisydataset[1][:, 1] = noisydataset[1][:, 1] .+ (randn(size(noisydataset[1][:, 1])) .* 0.8)
+
+noisydataset_new = deepcopy(datasetpde_new)
+noisydataset_new[1][:, 1] = noisydataset_new[1][:, 1] .+
+                            (randn(size(noisydataset_new[1][:, 1])) .* 0.8)
+
+plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
+scatter!(noisydataset[1][:, 2], noisydataset[1][:, 1])
+
+plot(datasetpde[1][:, 2], datasetpde[1][:, 3], datasetpde[1][:, 1],
+    title = "Dataset from Analytical Solution")
+scatter!(noisydataset[1][:, 2], noisydataset[1][:, 3], noisydataset[1][:, 1])
+
+plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 1],
+    title = "Dataset from Analytical Solution")
+scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 1])
+
+plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 3],
+    datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
+scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 3], noisydataset_new[1][:, 1])
+
+noise_std = 1.4
+original_data = datasetpde[1][:, 1]
+original_std = std(original_data)
+ratio = noise_std / original_std
+
+# Neural network
+chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+    Lux.Dense(8, 8, Lux.tanh),
+    Lux.Dense(8, 1))
+
+discretization = NeuralPDE.BayesianPINN([chain],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
+
+discretization_new = NeuralPDE.BayesianPINN([chain],
+    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset_new, nothing])
+
+@named pde_system = PDESystem(eq,
+    bcs,
+    domains,
+    [x, t],
+    [u(x, t)],
+    [α],
+    defaults = Dict([α => 2.0]))
+
+eqs = pde_system.eqs
+Dict_differentials = Dict()
+exps = toexpr.(eqs)
+nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+sol1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_1 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.7],
+    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol2 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol3 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 100, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.7], l2std = [0.15], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol4 = ahmc_bayesian_pinn_pde(pde_system,
+    discretization;
+    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol3.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol4.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+sol0_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+julia > sol5_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol1_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_1_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
+    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_2_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol1_3_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+    phystd = [0.3], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 3.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    Dict_differentials = Dict_differentials,
+    progress = true)
+
+sol2_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol3_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol4_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 160, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+sol5_new = ahmc_bayesian_pinn_pde(pde_system,
+    discretization_new;
+    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
+    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
+    priorsNNw = (0.0, 1.0),
+    saveats = [1 / 100.0, 1 / 100.0],
+    progress = true)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+
+p1 = plot(xs, u_predict, title = "predict")
+p2 = plot(xs, u_real, title = "analytic")
+p3 = plot(xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+phi = discretization_new.phi[1]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
+
+u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
+             for t in ts]
+u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+diff_u = [[abs(u_analytic(x, t) -
+               first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
+           for x in xs]
+          for t in ts]
+p1 = plot(ts, xs, u_predict, title = "predict")
+p2 = plot(ts, xs, u_real, title = "analytic")
+p3 = plot(ts, xs, diff_u, title = "error")
+plot(p1, p2, p3)
+
+# MCMC chain analysis
+plot(sol1.original.mcmc_chain)
+plot(sol2.original.mcmc_chain)
+
+plot(sol0_new.original.mcmc_chain)
+plot(sol2_new.original.mcmc_chain)
+
+plot(sol1.original.mcmc_chain)
+meanplot(sol1.original.mcmc_chain)
+autocorplot(sol1.original.mcmc_chain)
+traceplot(sol1.original.mcmc_chain)
+
+plot(sol2.original.mcmc_chain)
+meanplot(sol2.original.mcmc_chain)
+autocorplot(sol2.original.mcmc_chain)
+traceplot(sol2.original.mcmc_chain)
+
+plot(sol0_new.original.mcmc_chain)
+meanplot(sol0_new.original.mcmc_chain)
+autocorplot(sol0_new.original.mcmc_chain)
+
+plot(sol2_new.original.mcmc_chain)
+meanplot(sol2_new.original.mcmc_chain)
+autocorplot(sol2_new.original.mcmc_chain)
+
+plot(sol3_new.original.mcmc_chain)
+meanplot(sol3_new.original.mcmc_chain)
+autocorplot(sol3_new.original.mcmc_chain)
\ No newline at end of file
diff --git a/test/bpinnexperimental.jl b/test/bpinnexperimental.jl
deleted file mode 100644
index a8a389ad44..0000000000
--- a/test/bpinnexperimental.jl
+++ /dev/null
@@ -1,140 +0,0 @@
-using Test, MCMCChains
-using ForwardDiff, Distributions, OrdinaryDiffEq
-using Flux, OptimizationOptimisers, AdvancedHMC, Lux
-using Statistics, Random, Functors, ComponentArrays
-using NeuralPDE, MonteCarloMeasurements
-
-Random.seed!(110)
-
-using NeuralPDE, Lux, Plots, OrdinaryDiffEq, Distributions, Random
-
-function lotka_volterra(u, p, t)
-    # Model parameters.
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-
-# initial-value problem.
-u0 = [1.0, 1.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 4.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-
-# Solve using OrdinaryDiffEq.jl solver
-dt = 0.2
-solution = solve(prob, Tsit5(); saveat = dt)
-
-times = solution.t
-u = hcat(solution.u...)
-x = u[1, :] + (u[1, :]) .* (0.3 .* randn(length(u[1, :])))
-y = u[2, :] + (u[2, :]) .* (0.3 .* randn(length(u[2, :])))
-dataset = [x, y, times]
-
-plot(times, x, label = "noisy x")
-plot!(times, y, label = "noisy y")
-plot!(solution, labels = ["x" "y"])
-
-chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-    Lux.Dense(6, 2))
-
-alg1 = BNNODE(chain;
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [0.1, 0.1],
-    phystd = [0.1, 0.1],
-    priorsNNw = (0.0, 3.0),
-    param = [
-        Normal(1, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(0, 2)], progress = true)
-
-alg2 = BNNODE(chain;
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [0.1, 0.1],
-    phystd = [0.1, 0.1],
-    priorsNNw = (0.0, 3.0),
-    param = [
-        Normal(1, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(0, 2)], estim_collocate = true, progress = true)
-
-@time sol_pestim1 = solve(prob, alg1; saveat = dt)
-@time sol_pestim2 = solve(prob, alg2; saveat = dt)
-plot(times, sol_pestim1.ensemblesol[1], label = "estimated x1")
-plot!(times, sol_pestim2.ensemblesol[1], label = "estimated x2")
-plot!(times, sol_pestim1.ensemblesol[2], label = "estimated y1")
-plot!(times, sol_pestim2.ensemblesol[2], label = "estimated y2")
-
-# comparing it with the original solution
-plot!(solution, labels = ["true x" "true y"])
-
-@show sol_pestim1.estimated_de_params
-@show sol_pestim2.estimated_de_params
-
-function fitz(u, p, t)
-    v, w = u[1], u[2]
-    a, b, τinv, l = p[1], p[2], p[3], p[4]
-
-    dv = v - 0.33 * v^3 - w + l
-    dw = τinv * (v + a - b * w)
-
-    return [dv, dw]
-end
-
-prob_ode_fitzhughnagumo = ODEProblem(
-    fitz, [1.0, 1.0], (0.0, 10.0), [0.7, 0.8, 1 / 12.5, 0.5])
-dt = 0.5
-sol = solve(prob_ode_fitzhughnagumo, Tsit5(), saveat = dt)
-
-sig = 0.20
-data = Array(sol)
-dataset = [data[1, :] .+ (sig .* rand(length(sol.t))),
-    data[2, :] .+ (sig .* rand(length(sol.t))), sol.t]
-priors = [Normal(0.5, 1.0), Normal(0.5, 1.0), Normal(0.0, 0.5), Normal(0.5, 1.0)]
-
-plot(sol.t, dataset[1], label = "noisy x")
-plot!(sol.t, dataset[2], label = "noisy y")
-plot!(sol, labels = ["x" "y"])
-
-chain = Lux.Chain(Lux.Dense(1, 10, tanh), Lux.Dense(10, 10, tanh),
-    Lux.Dense(10, 2))
-
-Adaptorkwargs = (Adaptor = AdvancedHMC.StanHMCAdaptor,
-    Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.8)
-alg1 = BNNODE(chain;
-dataset = dataset,
-draw_samples = 1000,
-l2std = [0.1, 0.1],
-phystd = [0.1, 0.1],
-priorsNNw = (0.01, 3.0),
-Adaptorkwargs = Adaptorkwargs,
-param = priors, progress = true)
-
-alg2 = BNNODE(chain;
-    dataset = dataset,
-    draw_samples = 1000,
-    l2std = [0.1, 0.1],
-    phystd = [0.1, 0.1],
-    priorsNNw = (0.01, 3.0),
-    Adaptorkwargs = Adaptorkwargs,
-    param = priors, estim_collocate = true, progress = true)
-
-@time sol_pestim3 = solve(prob_ode_fitzhughnagumo, alg1; saveat = dt)
-@time sol_pestim4 = solve(prob_ode_fitzhughnagumo, alg2; saveat = dt)
-plot!(sol.t, sol_pestim3.ensemblesol[1], label = "estimated x1")
-plot!(sol.t, sol_pestim4.ensemblesol[1], label = "estimated x2")
-plot!(sol.t, sol_pestim3.ensemblesol[2], label = "estimated y1")
-plot!(sol.t, sol_pestim4.ensemblesol[2], label = "estimated y2")
-
-@show sol_pestim3.estimated_de_params
-@show sol_pestim4.estimated_de_params

From 92368cf9f270cb5d18751710dd9638f10e24681c Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sun, 12 May 2024 11:13:18 +0530
Subject: [PATCH 098/107] need PDE exp file to be concise

---
 src/NeuralPDE.jl               |    1 -
 test/BPINN_pde_experimental.jl | 1669 --------------------------------
 2 files changed, 1670 deletions(-)
 delete mode 100644 test/BPINN_pde_experimental.jl

diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index f927af98aa..c0798c6270 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -89,7 +89,6 @@ include("BPINN_ode.jl")
 include("PDE_BPINN.jl")
 
 include("dgm.jl")
-include("collocated_estim.jl")
 
 export NNODE, NNDAE
 export BNNODE, ahmc_bayesian_pinn_ode, ahmc_bayesian_pinn_pde
diff --git a/test/BPINN_pde_experimental.jl b/test/BPINN_pde_experimental.jl
deleted file mode 100644
index a8f4a0341e..0000000000
--- a/test/BPINN_pde_experimental.jl
+++ /dev/null
@@ -1,1669 +0,0 @@
-using Test, MCMCChains, Lux, ModelingToolkit
-import ModelingToolkit: Interval, infimum, supremum
-using ForwardDiff, Distributions, OrdinaryDiffEq
-using AdvancedHMC, Statistics, Random, Functors
-using NeuralPDE, MonteCarloMeasurements
-using ComponentArrays, ModelingToolkit
-
-Random.seed!(100)
-
-# function required to use the new loss, creates a dicitonary of differntial operator terms
-function recur_expression(exp, Dict_differentials)
-    for in_exp in exp.args
-        if !(in_exp isa Expr)
-            # skip +,== symbols, characters etc
-            continue
-
-        elseif in_exp.args[1] isa ModelingToolkit.Differential
-            # first symbol of differential term
-            # Dict_differentials for masking differential terms
-            # and resubstituting differentials in equations after putting in interpolations
-            # temp = in_exp.args[end]
-            Dict_differentials[eval(in_exp)] = Symbolics.variable("diff_$(length(Dict_differentials) + 1)")
-            return
-        else
-            recur_expression(in_exp, Dict_differentials)
-        end
-    end
-end
-
-# experiments are here
-println("Example 3: Lotka Volterra with New parameter estimation")
-@parameters t α β γ δ
-@variables x(..) y(..)
-
-Dt = Differential(t)
-eqs = [Dt(x(t)) * α ~ x(t) - β * x(t) * y(t), Dt(y(t)) * δ ~ x(t) * y(t) - y(t) * γ]
-bcs = [x(0) ~ 1.0, y(0) ~ 1.0]
-domains = [t ∈ Interval(0.0, 7.0)]
-
-# Define the parameters' values
-# α, β, γ, δ = p
-
-# regular equations
-# dx = (1.5 - y) * x # prey
-# dy = (x - 3.0) * y # predator
-# p = [1.5, 1.0, 3.0, 1.0] non transformed values
-
-# transformed equations
-# dx*0.666 = (1 - 0.666 * y) * x # prey
-# dy*1.0 = (x - 3.0) * y # predator
-# p = [0.666, 0.666, 3.0, 1.0] transformed values (change is scale also ensured!)
-
-chainl = [
-    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin), Lux.Dense(5, 1)),
-    Lux.Chain(Lux.Dense(1, 5, sin), Lux.Dense(5, 5, sin), Lux.Dense(5, 1))
-]
-
-initl, st = Lux.setup(Random.default_rng(), chainl[1])
-initl1, st1 = Lux.setup(Random.default_rng(), chainl[2])
-
-using NeuralPDE, Lux, OrdinaryDiffEq, Distributions, Random
-
-function lotka_volterra(u, p, t)
-    # Model parameters. 
-    α, β, γ, δ = p
-    # Current state.
-    x, y = u
-
-    # Evaluate differential equations.
-    dx = (α - β * y) * x # prey
-    dy = (δ * x - γ) * y # predator
-
-    return [dx, dy]
-end
-# initial-value problem.
-u0 = [1.0, 1.0]
-# p = [2/3, 2/3, 1/3.0, 1/3.0]
-p = [1.5, 1.0, 3.0, 1.0]
-tspan = (0.0, 7.0)
-prob = ODEProblem(lotka_volterra, u0, tspan, p)
-dt = 0.01
-solution = solve(prob, Tsit5(); saveat = dt)
-solution1 = solve(prob, Tsit5(); saveat = 0.02)
-
-function calculate_errors(approx_sol, solution_points)
-    # Check vector lengths match
-    if length(approx_sol) != length(solution_points)
-        error("Vectors must have the same length")
-    end
-
-    # Calculate errors
-    n = length(approx_sol)
-    errors = randn(n)
-    for i in 1:n
-        errors[i] = solution_points[i] - approx_sol[i]
-    end
-
-    # Calculate RMSE
-    rmse = sqrt(mean(errors .^ 2))
-
-    # Calculate MAE
-    mae = mean(abs.(errors))
-
-    # Calculate maximum absolute error
-    max_error = maximum(abs.(errors))
-
-    # Return dictionary with errors
-    return Dict(
-        "RMSE" => rmse,
-        "MAE" => mae,
-        "Max Abs Error" => max_error
-    )
-end
-u = hcat(solution1.u...)
-
-a1 = calculate_errors(pmean(sol6_1.ensemblesol[1]), u1[1, :])
-b1 = calculate_errors(pmean(sol6_1.ensemblesol[2]), u1[2, :])
-
-a = calculate_errors(pmean(sol6_2.ensemblesol[1]), u[1, :])
-b = calculate_errors(pmean(sol6_2.ensemblesol[2]), u[2, :])
-
-c = calculate_errors(pmean(sol6_L2_2.ensemblesol[1]), u[1, :])
-d = calculate_errors(pmean(sol6_L2_2.ensemblesol[2]), u[2, :])
-
-e = calculate_errors(pmean(sol6_L2_1.ensemblesol[1]), u[1, :])
-f = calculate_errors(pmean(sol6_L2_1.ensemblesol[2]), u[2, :])
-
-g = calculate_errors(pmean(sol6_L2.ensemblesol[1]), u[1, :])
-h = calculate_errors(pmean(sol6_L2.ensemblesol[2]), u[2, :])
-
-# function moving_average_smoothing(data::Vector{T}, window_size::Int) where {T}
-#     smoothed_data = similar(data, T, length(data))
-
-#     for i in 1:length(data)
-#         start_idx = max(1, i - window_size)
-#         end_idx = min(length(data), i + window_size)
-#         smoothed_data[i] = mean(data[start_idx:end_idx])
-#     end
-
-#     return smoothed_data'
-# end
-
-# Extract solution
-time = solution.t
-u = hcat(solution.u...)
-time1 = solution.t
-u_noisy = u .+ u .* (0.2 .* randn(size(u)))
-u_noisy0 = u .+ (3.0 .* rand(size(u)[1], size(u)[2]) .- 1.5)
-u_noisy1 = u .+ (0.8 .* randn(size(Array(solution))))
-u_noisy2 = u .+ (0.5 .* randn(size(Array(solution))))
-
-plot(time, u[1, :])
-plot!(time, u[2, :])
-scatter!(time1, u_noisy0[1, :])
-scatter!(time1, u_noisy0[2, :])
-scatter!(discretization_08_gaussian.dataset[1][1][:, 2],
-    discretization_08_gaussian.dataset[1][1][:, 1])
-scatter!(discretization_08_gaussian.dataset[1][2][:, 2],
-    discretization_08_gaussian.dataset[1][2][:, 1])
-
-scatter!(discretization_05_gaussian.dataset[1][1][:, 2],
-    discretization_05_gaussian.dataset[1][1][:, 1])
-scatter!(discretization_05_gaussian.dataset[1][2][:, 2],
-    discretization_05_gaussian.dataset[1][2][:, 1])
-# discretization_05_gaussian.dataset[1][1][:,2]
-# window_size = 5
-# smoothed_datasets = [moving_average_smoothing(u1[i, :], window_size)
-#                      for i in 1:length(solution.u[1])]
-# u2 = vcat(smoothed_datasets[1], smoothed_datasets[2])
-
-# Randomly select some points from the solution
-num_points = 100  # Number of points to select
-selected_indices = rand(1:size(u_noisy1, 2), num_points)
-upoints = [u_noisy1[:, i] for i in selected_indices]
-timepoints = [time[i] for i in selected_indices]
-temp = hcat(upoints...)
-dataset = [hcat(temp[i, :], timepoints) for i in 1:2]
-
-discretization_uniform = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-discretization_08_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-discretization_05_gaussian = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-
-discretization1 = BayesianPINN(chainl, GridTraining([0.01]), param_estim = true,
-    dataset = [dataset, nothing])
-
-scatter!(discretization.dataset[1][1][:, 2], discretization.dataset[1][1][:, 1])
-scatter!(discretization.dataset[1][2][:, 2], discretization.dataset[1][2][:, 1])
-
-sol = solve(prob, Tsit5(); saveat = 0.1)
-odedata = Array(sol) + 0.8 * randn(size(Array(sol)))
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [x(t), y(t)],
-    [α, β, γ, δ],
-    defaults = Dict([α => 2, β => 2, γ => 2, δ => 2]))
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_uniform = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-# more iterations for above
-sol3_100_uniform_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian_1000 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-# more iterations for above + strict BC
-sol3_100_uniform_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_uniform;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_1000_bc_hard = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.05, 0.05],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_05_gaussian_1000_bc = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 1000,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol3_100_08_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_08_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol3_100_05_gaussian_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_05_gaussian;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-sol4_0 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true
-)
-
-sol5_00 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_0 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# 70 points in dataset
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-# SOL6_1 VS SOL6_L2
-sol6_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_2_L2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol6_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol6_L2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.2, 0.2],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol6_L2_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-sol6_L2_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization1;
-    draw_samples = 700,
-    bcstd = [0.05, 0.05],
-    phystd = [0.2, 0.2], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2),
-        Normal(2, 2)
-    ], progress = true)
-
-# 50 datapoint 0-5 sol5 vs sol4
-# julia> sol4.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.549 ± 0.0058
-#  0.71 ± 0.0042
-#  0.408 ± 0.0063
-#  0.355 ± 0.0015
-
-# julia> sol5.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0052
-#  0.702 ± 0.0034
-#  0.346 ± 0.0037
-#  0.335 ± 0.0013
-
-# 100 datapoint 0-5 sol5_2 vs sol3
-# julia> sol3.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.598 ± 0.0037
-#  0.711 ± 0.0027
-#  0.399 ± 0.0032
-#  0.333 ± 0.0011
-
-# julia> sol5_2.estimated_de_params
-# 4-element Vector{Particles{Float64, 234}}:
-#  0.604 ± 0.0035
-#  0.686 ± 0.0026
-#  0.395 ± 0.0029
-#  0.328 ± 0.00095
-
-# timespan for full dataset (0-8)
-sol6 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true)
-
-sol5_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true
-)
-
-sol5_5 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], progress = true
-)
-
-sol7 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(1, 2),
-        Normal(1, 1),
-        Normal(1, 2),
-        Normal(1, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol5_5_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], progress = true
-)
-
-sol7_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.05, 0.05],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol7_2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.1, 0.1],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol7_3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.2, 0.2],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-sol7_4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 700,
-    bcstd = [0.1, 0.1],
-    phystd = [0.1, 0.1], l2std = [0.1, 0.1],
-    priorsNNw = (0.0, 5.0),
-    phystdnew = [0.3, 0.3],
-    saveats = [1 / 50.0],
-    param = [
-        Normal(2, 2),
-        Normal(2, 1),
-        Normal(2, 2),
-        Normal(2, 1)
-    ], Dict_differentials = Dict_differentials, progress = true)
-
-using Plots, StatsPlots
-plotly()
-plot(time, u[1, :])
-plot!(time, u[2, :])
-scatter!(time, u_noisy[1, :])
-scatter!(time, u_noisy[2, :])
-scatter!(discretization.dataset[1][1][:, 2], discretization.dataset[1][1][:, 1])
-scatter!(discretization.dataset[1][2][:, 2], discretization.dataset[1][2][:, 1])
-
-scatter!(discretization1.dataset[1][1][:, 2],
-    discretization1.dataset[1][1][:, 1], legend = nothing)
-scatter!(discretization1.dataset[1][2][:, 2], discretization1.dataset[1][2][:, 1])
-
-# plot28(sol4 seems better vs sol3 plots, params seems similar)
-plot!(sol3.timepoints[1]', sol3.ensemblesol[1])
-plot!(sol3.timepoints[2]', sol3.ensemblesol[2])
-plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
-plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2])
-
-plot!(sol4.timepoints[1]', sol4.ensemblesol[1])
-plot!(sol4.timepoints[2]', sol4.ensemblesol[2])
-plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
-plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2])
-
-plot!(sol4_2.timepoints[1]', sol4_2.ensemblesol[1], legend = nothing)
-plot!(sol4_2.timepoints[2]', sol4_2.ensemblesol[2])
-plot!(sol5_2.timepoints[1]', sol5_2.ensemblesol[1], legend = nothing)
-plot!(sol5_2.timepoints[2]', sol5_2.ensemblesol[2])
-
-plot!(sol4_3.timepoints[1]', sol4_3.ensemblesol[1], legend = nothing)
-plot!(sol4_3.timepoints[2]', sol4_3.ensemblesol[2])
-plot!(sol5_3.timepoints[1]', sol5_3.ensemblesol[1])
-plot!(sol5_3.timepoints[2]', sol5_3.ensemblesol[2])
-plot!(sol5_4.timepoints[1]', sol5_4.ensemblesol[1], legend = nothing)
-plot!(sol5_4.timepoints[2]', sol5_4.ensemblesol[2])
-
-# plot 36 sol4 vs sol5(params sol4 better, but plots sol5 "looks" better),plot 44(sol5 better than sol6 overall)
-plot!(sol5.timepoints[1]', sol5.ensemblesol[1], legend = nothing)
-plot!(sol5.timepoints[2]', sol5.ensemblesol[2])
-
-plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1], legend = nothing)
-plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2])
-
-plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1], legend = nothing)
-plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
-
-plot!(sol6.timepoints[1]', sol6.ensemblesol[1])
-plot!(sol6.timepoints[2]', sol6.ensemblesol[2])
-plot!(sol6_L2.timepoints[1]', sol6_L2.ensemblesol[1])
-plot!(sol6_L2.timepoints[2]', sol6_L2.ensemblesol[2])
-
-plot!(sol6_L2_1.timepoints[1]', sol6_L2_1.ensemblesol[1])
-plot!(sol6_L2_1.timepoints[2]', sol6_L2_1.ensemblesol[2])
-
-plot!(sol6_L2_2.timepoints[1]', sol6_L2_2.ensemblesol[1])
-plot!(sol6_L2_2.timepoints[2]', sol6_L2_2.ensemblesol[2])
-
-plot!(sol6_1.timepoints[1]', sol6_1.ensemblesol[1])
-plot!(sol6_1.timepoints[2]', sol6_1.ensemblesol[2])
-plot!(sol6_2.timepoints[1]', sol6_2.ensemblesol[1])
-plot!(sol6_2.timepoints[2]', sol6_2.ensemblesol[2], legend = nothing)
-plot!(sol6_2_L2.timepoints[1]', sol6_2_L2.ensemblesol[1])
-plot!(sol6_2_L2.timepoints[2]', sol6_2_L2.ensemblesol[2], legend = nothing)
-
-# plot52 sol7 vs sol5(sol5 overall better plots, params?)
-plot!(sol7.timepoints[1]', sol7.ensemblesol[1])
-plot!(sol7.timepoints[2]', sol7.ensemblesol[2])
-
-# sol8,sol8_2,sol9,sol9_2 bad
-plot!(sol8.timepoints[1]', sol8.ensemblesol[1])
-plot!(sol8.timepoints[2]', sol8.ensemblesol[2])
-plot!(sol8_2.timepoints[1]', sol8_2.ensemblesol[1])
-plot!(sol8_2.timepoints[2]', sol8_2.ensemblesol[2])
-
-plot!(sol9.timepoints[1]', sol9.ensemblesol[1])
-plot!(sol9.timepoints[2]', sol9.ensemblesol[2])
-plot!(sol9_2.timepoints[1]', sol9_2.ensemblesol[1])
-plot!(sol9_2.timepoints[2]', sol9_2.ensemblesol[2])
-
-plot!(sol5_5.timepoints[1]', sol5_5.ensemblesol[1])
-plot!(sol5_5.timepoints[2]', sol5_5.ensemblesol[2], legend = nothing)
-
-plot!(sol5_5_1.timepoints[1]', sol5_5_1.ensemblesol[1])
-plot!(sol5_5_1.timepoints[2]', sol5_5_1.ensemblesol[2], legend = nothing)
-plot!(sol7_1.timepoints[1]', sol7_1.ensemblesol[1])
-plot!(sol7_1.timepoints[2]', sol7_1.ensemblesol[2])
-
-plot!(sol7_4.timepoints[1]', sol7_4.ensemblesol[1])
-plot!(sol7_4.timepoints[2]', sol7_4.ensemblesol[2])
-
-plot!(sol5_2_1.timepoints[1]', sol5_2_1.ensemblesol[1], legend = nothing)
-plot!(sol5_2_1.timepoints[2]', sol5_2_1.ensemblesol[2])
-plot!(sol5_2_2.timepoints[1]', sol5_2_2.ensemblesol[1], legend = nothing)
-plot!(sol5_2_2.timepoints[2]', sol5_2_2.ensemblesol[2])
-
-plot!(sol5_0.timepoints[1]', sol5_0.ensemblesol[1])
-plot!(sol5_0.timepoints[2]', sol5_0.ensemblesol[2], legend = nothing)
-
-plot!(sol5_00.timepoints[1]', sol5_00.ensemblesol[1], legend = nothing)
-plot!(sol5_00.timepoints[2]', sol5_00.ensemblesol[2])
-
-plot!(sol3_0.timepoints[1]', sol3_0.ensemblesol[1])
-plot!(sol3_0.timepoints[2]', sol3_0.ensemblesol[2], legend = nothing)
-plot!(sol4_0.timepoints[1]', sol4_0.ensemblesol[1])
-plot!(sol4_0.timepoints[2]', sol4_0.ensemblesol[2], legend = nothing)
-
-plot!(sol3_100_05_gaussian.timepoints[1]', sol3_100_05_gaussian.ensemblesol[1])
-plot!(sol3_100_05_gaussian.timepoints[2]',
-    sol3_100_05_gaussian.ensemblesol[2], legend = nothing)
-
-plot!(sol3_100_05_gaussian_new.timepoints[1]', sol3_100_05_gaussian_new.ensemblesol[1])
-plot!(sol3_100_05_gaussian_new.timepoints[2]', sol3_100_05_gaussian_new.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian.timepoints[1]', sol3_100_08_gaussian.ensemblesol[1])
-plot!(sol3_100_08_gaussian.timepoints[2]', sol3_100_08_gaussian.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_new.timepoints[1]', sol3_100_08_gaussian_new.ensemblesol[1])
-plot!(sol3_100_08_gaussian_new.timepoints[2]',
-    sol3_100_08_gaussian_new.ensemblesol[2], legend = nothing)
-
-plot!(sol3_100_uniform.timepoints[1]', sol3_100_uniform.ensemblesol[1])
-plot!(sol3_100_uniform.timepoints[2]', sol3_100_uniform.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_1000.timepoints[1]', sol3_100_08_gaussian_1000.ensemblesol[1])
-plot!(sol3_100_08_gaussian_1000.timepoints[2]', sol3_100_08_gaussian_1000.ensemblesol[2])
-
-plot!(sol3_100_05_gaussian_1000.timepoints[1]', sol3_100_05_gaussian_1000.ensemblesol[1])
-plot!(sol3_100_05_gaussian_1000.timepoints[2]', sol3_100_05_gaussian_1000.ensemblesol[2])
-
-plot!(sol3_100_uniform_1000.timepoints[1]', sol3_100_uniform_1000.ensemblesol[1])
-plot!(sol3_100_uniform_1000.timepoints[2]', sol3_100_uniform_1000.ensemblesol[2])
-
-plot!(sol3_100_08_gaussian_1000_bc.timepoints[1]',
-    sol3_100_08_gaussian_1000_bc.ensemblesol[1])
-plot!(sol3_100_08_gaussian_1000_bc.timepoints[2]',
-    sol3_100_08_gaussian_1000_bc.ensemblesol[2])
-
-# test with lower number of points
-# consider full range dataset case
-# combination of all above
-
-# run 1 100 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol5.estimated_de_params
-sol6.estimated_de_params
-
-# run 2 200 iters
-sol3.estimated_de_params
-sol4.estimated_de_params
-
-# p = [2/3, 2/3, 1/3, 1/3]
-sol3.estimated_de_params
-sol4.estimated_de_params
-
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-    dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)], progress = true)
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=0.1
-@test mean(u_predict .- u_real) < 0.01
-@test sol1.estimated_de_params[1]≈param atol=0.1
-sol1.estimated_de_params[1]
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.02], phystdnew = [0.02],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(4.0, 2)],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-param = 2 * π
-ts_2 = vec(sol2.timepoints[1])
-u_real_2 = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict_2 = pmean(sol2.ensemblesol[1])
-
-@test u_predict_2≈u_real_2 atol=0.1
-@test mean(u_predict_2 .- u_real_2) < 0.01
-@test sol2.estimated_de_params[1]≈param atol=0.1
-sol2.estimated_de_params[1]
-
-plot(ts_2, u_predict_2)
-plot!(ts_2, u_real_2)
-
-@parameters t, σ_
-@variables x(..), y(..), z(..)
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-
-input_ = length(domains)
-n = 7
-chain = [
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1))
-]
-
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
-end
-
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-ts = sol.t
-us = hcat(sol.u...)
-us = us .+ ((0.05 .* randn(size(us))) .* us)
-ts_ = hcat(sol(ts).t...)[1, :]
-dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
-    dataset = [dataset, nothing])
-
-@named pde_system = PDESystem(eqs, bcs, domains,
-    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100,
-    bcstd = [0.3, 0.3, 0.3],
-    phystd = [0.1, 0.1, 0.1],
-    l2std = [1, 1, 1],
-    priorsNNw = (0.0, 1.0),
-    saveats = [0.01],
-    param = [Normal(14.0, 2)], progress = true)
-
-idealp = 10.0
-p_ = sol1.estimated_de_params[1]
-@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
-
-@parameters x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-
-# 2D PDE
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# Boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
-
-# Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
-
-# Neural network
-dim = 2 # number of dimensions
-chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
-
-# Discretization
-dx = 0.04
-discretization = BayesianPINN([chain], GridTraining(dx), dataset = [[dataset], nothing])
-
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 5,
-    bcstd = [0.01, 0.01, 0.01, 0.01],
-    phystd = [0.005],
-    priorsNNw = (0.0, 2.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-xs = sol1.timepoints[1]
-sol1.ensemblesol[1]
-analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-
-dataset = hcat(u_real, xs')
-u_predict = pmean(sol1.ensemblesol[1])
-u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-@test u_predict≈u_real atol=0.8
-
-# KS EQUATION
-using NeuralPDE, Flux, Lux, ModelingToolkit, LinearAlgebra, AdvancedHMC
-import ModelingToolkit: Interval, infimum, supremum, Distributions
-using Plots, MonteCarloMeasurements, StatsPlots
-# plotly()
-
-@parameters x, t, α
-@variables u(..)
-Dt = Differential(t)
-Dx = Differential(x)
-Dx2 = Differential(x)^2
-Dx3 = Differential(x)^3
-Dx4 = Differential(x)^4
-
-# α = 1
-β = 4
-γ = 1
-eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
-
-u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
-
-bcs = [u(x, 0) ~ u_analytic(x, 0),
-    u(-10, t) ~ u_analytic(-10, t),
-    u(10, t) ~ u_analytic(10, t),
-    Dx(u(-10, t)) ~ du(-10, t),
-    Dx(u(10, t)) ~ du(10, t)]
-
-# Space and time domains
-domains = [x ∈ Interval(-10.0, 10.0),
-    t ∈ Interval(0.0, 1.0)]
-
-# Discretization
-dx = 0.4;
-dt = 0.2;
-
-# Function to compute analytical solution at a specific point (x, t)
-function u_analytic_point(x, t)
-    z = -x / 2 + t
-    return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
-end
-
-# Function to generate the dataset matrix
-function generate_dataset_matrix(domains, dx, dt, xlim, tlim)
-    x_values = xlim[1]:dx:xlim[2]
-    t_values = tlim[1]:dt:tlim[2]
-
-    dataset = []
-
-    for t in t_values
-        for x in x_values
-            u_value = u_analytic_point(x, t)
-            push!(dataset, [u_value, x, t])
-        end
-    end
-
-    return vcat([data' for data in dataset]...)
-end
-
-datasetpde = [generate_dataset_matrix(domains, dx, dt, [-10, 10], [0.0, 1.0])]
-datasetpde_new = [generate_dataset_matrix(domains, dx, dt, [-10, 0], [0.0, 1.0])]
-
-# noise to dataset
-noisydataset = deepcopy(datasetpde)
-noisydataset[1][:, 1] = noisydataset[1][:, 1] .+ (randn(size(noisydataset[1][:, 1])) .* 0.8)
-
-noisydataset_new = deepcopy(datasetpde_new)
-noisydataset_new[1][:, 1] = noisydataset_new[1][:, 1] .+
-                            (randn(size(noisydataset_new[1][:, 1])) .* 0.8)
-
-plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
-scatter!(noisydataset[1][:, 2], noisydataset[1][:, 1])
-
-plot(datasetpde[1][:, 2], datasetpde[1][:, 3], datasetpde[1][:, 1],
-    title = "Dataset from Analytical Solution")
-scatter!(noisydataset[1][:, 2], noisydataset[1][:, 3], noisydataset[1][:, 1])
-
-plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 1],
-    title = "Dataset from Analytical Solution")
-scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 1])
-
-plot(datasetpde_new[1][:, 2], datasetpde_new[1][:, 3],
-    datasetpde_new[1][:, 1], title = "Dataset from Analytical Solution")
-scatter!(noisydataset_new[1][:, 2], noisydataset_new[1][:, 3], noisydataset_new[1][:, 1])
-
-noise_std = 1.4
-original_data = datasetpde[1][:, 1]
-original_std = std(original_data)
-ratio = noise_std / original_std
-
-# Neural network
-chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
-    Lux.Dense(8, 8, Lux.tanh),
-    Lux.Dense(8, 1))
-
-discretization = NeuralPDE.BayesianPINN([chain],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset, nothing])
-
-discretization_new = NeuralPDE.BayesianPINN([chain],
-    GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset_new, nothing])
-
-@named pde_system = PDESystem(eq,
-    bcs,
-    domains,
-    [x, t],
-    [u(x, t)],
-    [α],
-    defaults = Dict([α => 2.0]))
-
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 90, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.7],
-    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol3 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 100, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.7], l2std = [0.15], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol4 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 80, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.7], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol1_1.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol2.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol2.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol3.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol3.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol4.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) - first(pmean(phi([x, t], sol4.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-sol0_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-julia > sol5_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol1_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_1_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 110, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.5],
-    phystd = [0.5], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_2_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol1_3_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 150, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
-    phystd = [0.3], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 3.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    Dict_differentials = Dict_differentials,
-    progress = true)
-
-sol2_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.2], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol3_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 140, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol4_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 160, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-sol5_new = ahmc_bayesian_pinn_pde(pde_system,
-    discretization_new;
-    draw_samples = 170, Kernel = AdvancedHMC.NUTS(0.8),
-    bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
-    phystd = [0.2], l2std = [0.1], param = [Distributions.Normal(2.0, 2)],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 100.0, 1 / 100.0],
-    progress = true)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol0_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol1_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol1_1_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol1_2_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol1_3_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol2_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol3_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol4_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-
-p1 = plot(xs, u_predict, title = "predict")
-p2 = plot(xs, u_real, title = "analytic")
-p3 = plot(xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-phi = discretization_new.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain)
-          for (d, dx) in zip(domains, [dx / 10, dt])]
-
-u_predict = [[first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))) for x in xs]
-             for t in ts]
-u_real = [[u_analytic(x, t) for x in xs] for t in ts]
-diff_u = [[abs(u_analytic(x, t) -
-               first(pmean(phi([x, t], sol5_new.estimated_nn_params[1]))))
-           for x in xs]
-          for t in ts]
-p1 = plot(ts, xs, u_predict, title = "predict")
-p2 = plot(ts, xs, u_real, title = "analytic")
-p3 = plot(ts, xs, diff_u, title = "error")
-plot(p1, p2, p3)
-
-# MCMC chain analysis
-plot(sol1.original.mcmc_chain)
-plot(sol2.original.mcmc_chain)
-
-plot(sol0_new.original.mcmc_chain)
-plot(sol2_new.original.mcmc_chain)
-
-plot(sol1.original.mcmc_chain)
-meanplot(sol1.original.mcmc_chain)
-autocorplot(sol1.original.mcmc_chain)
-traceplot(sol1.original.mcmc_chain)
-
-plot(sol2.original.mcmc_chain)
-meanplot(sol2.original.mcmc_chain)
-autocorplot(sol2.original.mcmc_chain)
-traceplot(sol2.original.mcmc_chain)
-
-plot(sol0_new.original.mcmc_chain)
-meanplot(sol0_new.original.mcmc_chain)
-autocorplot(sol0_new.original.mcmc_chain)
-
-plot(sol2_new.original.mcmc_chain)
-meanplot(sol2_new.original.mcmc_chain)
-autocorplot(sol2_new.original.mcmc_chain)
-
-plot(sol3_new.original.mcmc_chain)
-meanplot(sol3_new.original.mcmc_chain)
-autocorplot(sol3_new.original.mcmc_chain)
\ No newline at end of file

From fc2b3078c5840e761fd44b040e269ccdcac62f57 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Wed, 2 Oct 2024 19:12:45 +0530
Subject: [PATCH 099/107] corrections in rebase

---
 test/BPINN_Tests.jl | 163 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 163 insertions(+)

diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 6a32c560f0..d53553f29e 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -243,3 +243,166 @@ end
     alg = BNNODE(chainflux, draw_samples = 2500)
     @test alg.chain isa Lux.AbstractExplicitLayer
 end
+
+@testset "Example 3 but with the new objective" begin
+    linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
+    tspan = (0.0, 10.0)
+    u0 = 0.0
+    p = -5.0
+    prob = ODEProblem(linear, u0, tspan, p)
+    linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
+
+    # SOLUTION AND CREATE DATASET
+    sol = solve(prob, Tsit5(); saveat = 0.1)
+    u = sol.u
+    time = sol.t
+    x̂ = u .+ (0.3 .* randn(size(u)))
+    dataset = [x̂, time]
+    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+    # separate set of points for testing the solve() call (it uses saveat 1/50 hence here length 501)
+    time1 = vec(collect(Float64, range(tspan[1], tspan[2], length = 501)))
+    physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+    chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+    θinit, st = Lux.setup(Random.default_rng(), chainlux12)
+
+    fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(
+        prob, chainlux12,
+        dataset = dataset,
+        draw_samples = 1000,
+        l2std = [0.1],
+        phystd = [0.03],
+        priorsNNw = (0.0,
+            1.0),
+        param = [
+            Normal(-7, 3)
+        ])
+
+    fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(
+        prob, chainlux12,
+        dataset = dataset,
+        draw_samples = 1000,
+        l2std = [0.1],
+        phystd = [0.03],
+        priorsNNw = (0.0,
+            1.0),
+        param = [
+            Normal(-7, 3)
+        ], estim_collocate = true)
+
+    alg = BNNODE(chainlux12,
+        dataset = dataset,
+        draw_samples = 1000,
+        l2std = [0.1],
+        phystd = [0.03],
+        priorsNNw = (0.0,
+            1.0),
+        param = [
+            Normal(-7, 3)
+        ], estim_collocate = true)
+
+    sol3lux_pestim = solve(prob, alg)
+
+    # testing timepoints
+    t = sol.t
+    #------------------------------ ahmc_bayesian_pinn_ode() call
+    # Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
+    θ = [vector_to_parameters(fhsampleslux12[i][1:(end - 1)], θinit)
+         for i in 750:length(fhsampleslux12)]
+    luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
+    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+    meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+    θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit)
+         for i in 750:length(fhsampleslux22)]
+    luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
+    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+    meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+    @test mean(abs.(sol.u .- meanscurve2_2)) < 6e-2
+    @test mean(abs.(physsol1 .- meanscurve2_2)) < 6e-2
+    @test mean(abs.(sol.u .- meanscurve2_1)) > mean(abs.(sol.u .- meanscurve2_2))
+    @test mean(abs.(physsol1 .- meanscurve2_1)) > mean(abs.(physsol1 .- meanscurve2_2))
+
+    # estimated parameters(lux chain)
+    param2 = mean(i[62] for i in fhsampleslux22[750:length(fhsampleslux22)])
+    @test abs(param2 - p) < abs(0.25 * p)
+
+    param1 = mean(i[62] for i in fhsampleslux12[750:length(fhsampleslux12)])
+    @test abs(param1 - p) < abs(0.75 * p)
+    @test abs(param2 - p) < abs(param1 - p)
+
+    #-------------------------- solve() call 
+    # (lux chain)
+    @test mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 0.1
+    # estimated parameters(lux chain)
+    param3 = sol3lux_pestim.estimated_de_params[1]
+    @test abs(param3 - p) < abs(0.2 * p)
+end
+
+@testset "Example 4 - improvement" begin
+    function lotka_volterra(u, p, t)
+        # Model parameters.
+        α, β, γ, δ = p
+        # Current state.
+        x, y = u
+
+        # Evaluate differential equations.
+        dx = (α - β * y) * x # prey
+        dy = (δ * x - γ) * y # predator
+
+        return [dx, dy]
+    end
+
+    # initial-value problem.
+    u0 = [1.0, 1.0]
+    p = [1.5, 1.0, 3.0, 1.0]
+    tspan = (0.0, 4.0)
+    prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+    # Solve using OrdinaryDiffEq.jl solver
+    dt = 0.2
+    solution = solve(prob, Tsit5(); saveat = dt)
+
+    times = solution.t
+    u = hcat(solution.u...)
+    x = u[1, :] + (0.8 .* randn(length(u[1, :])))
+    y = u[2, :] + (0.8 .* randn(length(u[2, :])))
+    dataset = [x, y, times]
+
+    chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+        Lux.Dense(6, 2))
+
+    alg1 = BNNODE(chain;
+        dataset = dataset,
+        draw_samples = 1000,
+        l2std = [0.2, 0.2],
+        phystd = [0.1, 0.1],
+        priorsNNw = (0.0, 1.0),
+        param = [
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5)])
+
+    alg2 = BNNODE(chain;
+        dataset = dataset,
+        draw_samples = 1000,
+        l2std = [0.2, 0.2],
+        phystd = [0.1, 0.1],
+        priorsNNw = (0.0, 1.0),
+        param = [
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5)], estim_collocate = true)
+
+    @time sol_pestim1 = solve(prob, alg1; saveat = dt)
+    @time sol_pestim2 = solve(prob, alg2; saveat = dt)
+
+    unsafe_comparisons(true)
+    bitvec = abs.(p .- sol_pestim1.estimated_de_params) .>
+             abs.(p .- sol_pestim2.estimated_de_params)
+    @test bitvec == ones(size(bitvec))
+end
\ No newline at end of file

From fe904a08b97486dd57ee3c9e7d292a5b15d0f59f Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 11 Oct 2024 16:14:10 +0530
Subject: [PATCH 100/107] tests pass locally

---
 test/BPINN_PDEinvsol_tests.jl | 258 ++++++++++++++++++++--------------
 1 file changed, 151 insertions(+), 107 deletions(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index b71047cdca..24da3c7ffa 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -34,43 +34,7 @@ Random.seed!(100)
     u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
     dataset = [hcat(u1, timepoints)]
 
-    # TODO: correct implementations
-    # # checking all training strategies
-    # discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
-    #     dataset = [dataset, nothing])
-
-    # ahmc_bayesian_pinn_pde(pde_system,
-    #     discretization;
-    #     draw_samples = 1500,
-    #     bcstd = [0.05],
-    #     phystd = [0.01], l2std = [0.01],
-    #     priorsNNw = (0.0, 1.0),
-    #     saveats = [1 / 50.0],
-    #     param = [LogNormal(6.0, 0.5)])
-
-    # discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
-    #     dataset = [dataset, nothing])
-
-    # ahmc_bayesian_pinn_pde(pde_system,
-    #     discretization;
-    #     draw_samples = 1500,
-    #     bcstd = [0.05],
-    #     phystd = [0.01], l2std = [0.01],
-    #     priorsNNw = (0.0, 1.0),
-    #     saveats = [1 / 50.0],
-    #     param = [LogNormal(6.0, 0.5)])
-
-    # discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
-    #     dataset = [dataset, nothing])
-
-    # ahmc_bayesian_pinn_pde(pde_system,
-    #     discretization;
-    #     draw_samples = 1500,
-    #     bcstd = [0.05],
-    #     phystd = [0.01], l2std = [0.01],
-    #     priorsNNw = (0.0, 1.0),
-    #     saveats = [1 / 50.0],
-    #     param = [LogNormal(6.0, 0.5)])
+    # TODO: correct BPINN implementations for Training Strategies.
 
     discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
         dataset = [dataset, nothing])
@@ -174,73 +138,153 @@ function recur_expression(exp, Dict_differentials)
     end
 end
 
-println("Example 3: 2D Periodic System with New parameter estimation")
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) * u(t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u1 = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u1 = u1 .+ (u1 .* 0.2) .* randn(size(u1))
-dataset = [hcat(u1, timepoints)]
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
-    dataset = [dataset, nothing])
-
-# creating dictionary for masking equations
-eqs = pde_system.eqs
-Dict_differentials = Dict()
-exps = toexpr.(eqs)
-nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01], phystdnew = [0.05],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)],
-    Dict_differentials = Dict_differentials)
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-ts = vec(sol2.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol2.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
\ No newline at end of file
+@testset "improvement in Solving Parametric Kuromo-Sivashinsky Equation" begin
+    @parameters x, t, α
+    @variables u(..)
+    Dt = Differential(t)
+    Dx = Differential(x)
+    Dx2 = Differential(x)^2
+    Dx3 = Differential(x)^3
+    Dx4 = Differential(x)^4
+
+    # α = 1 (KS equation to be parametric in a)
+    β = 4
+    γ = 1
+    eq = Dt(u(x, t)) + u(x, t) * Dx(u(x, t)) + α * Dx2(u(x, t)) + β * Dx3(u(x, t)) + γ * Dx4(u(x, t)) ~ 0
+
+    u_analytic(x, t; z = -x / 2 + t) = 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+    du(x, t; z = -x / 2 + t) = 15 / 2 * (tanh(z) + 1) * (3 * tanh(z) - 1) * sech(z)^2
+
+    bcs = [u(x, 0) ~ u_analytic(x, 0),
+        u(-10, t) ~ u_analytic(-10, t),
+        u(10, t) ~ u_analytic(10, t),
+        Dx(u(-10, t)) ~ du(-10, t),
+        Dx(u(10, t)) ~ du(10, t)]
+
+    # Space and time domains
+    domains = [x ∈ Interval(-10.0, 10.0),
+        t ∈ Interval(0.0, 1.0)]
+
+    # Discretization
+    dx = 0.4
+    dt = 0.2
+
+    # Function to compute analytical solution at a specific point (x, t)
+    function u_analytic_point(x, t)
+        z = -x / 2 + t
+        return 11 + 15 * tanh(z) - 15 * tanh(z)^2 - 15 * tanh(z)^3
+    end
+
+    # Function to generate the dataset matrix
+    function generate_dataset_matrix(domains, dx, dt, xlim, tlim)
+        x_values = xlim[1]:dx:xlim[2]
+        t_values = tlim[1]:dt:tlim[2]
+
+        dataset = []
+
+        for t in t_values
+            for x in x_values
+                u_value = u_analytic_point(x, t)
+                push!(dataset, [u_value, x, t])
+            end
+        end
+
+        return vcat([data' for data in dataset]...)
+    end
+
+    # considering sparse dataset from half of x's domain
+    datasetpde_new = [generate_dataset_matrix(domains, dx, dt, [-10, 0], [0.0, 1.0])]
+
+    # Adding Gaussian noise with a 0.8 std
+    noisydataset_new = deepcopy(datasetpde_new)
+    noisydataset_new[1][:, 1] = noisydataset_new[1][:, 1] .+
+                                (randn(size(noisydataset_new[1][:, 1])) .* 0.8)
+
+    # Neural network
+    chain = Lux.Chain(Lux.Dense(2, 8, Lux.tanh),
+        Lux.Dense(8, 8, Lux.tanh),
+        Lux.Dense(8, 1))
+
+    # Discretization for old and new models
+    discretization = NeuralPDE.BayesianPINN([chain],
+        GridTraining([dx, dt]), param_estim = true, dataset = [noisydataset_new, nothing])
+
+    # let α default to 2.0
+    @named pde_system = PDESystem(eq,
+        bcs,
+        domains,
+        [x, t],
+        [u(x, t)],
+        [α],
+        defaults = Dict([α => 2.0]))
+
+    # neccesarry for loss function contruction (involves Operator masking) 
+    eqs = pde_system.eqs
+    Dict_differentials = Dict()
+    exps = toexpr.(eqs)
+    nullobj = [recur_expression(exp, Dict_differentials) for exp in exps]
+
+    # Dict_differentials is now ;
+    # Dict{Any, Any} with 5 entries:
+    #   Differential(x)(Differential(x)(u(x, t)))            => diff_5
+    #   Differential(x)(Differential(x)(Differential(x)(u(x… => diff_1
+    #   Differential(x)(Differential(x)(Differential(x)(Dif… => diff_2
+    #   Differential(x)(u(x, t))                             => diff_4
+    #   Differential(t)(u(x, t))                             => diff_3
+
+    # using HMC algorithm due to convergence, stability, time of training. (refer to mcmc chain plots)
+    # choice of std for objectives is very important
+    # pass in Dict_differentials, phystdnew arguments when using the new model
+
+    sol_new = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 150,
+        bcstd = [0.1, 0.1, 0.1, 0.1, 0.1], phystdnew = [0.2],
+        phystd = [0.2], l2std = [0.5], param = [Distributions.Normal(2.0, 2)],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 100.0, 1 / 100.0],
+        Dict_differentials = Dict_differentials,
+        progress = true)
+
+    sol_old = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 150,
+        bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
+        phystd = [0.2], l2std = [0.5], param = [Distributions.Normal(2.0, 2)],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 100.0, 1 / 100.0],
+        progress = true)
+
+    phi = discretization.phi[1]
+    xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+              for (d, dx) in zip(domains, [dx / 10, dt])]
+    u_real = [[u_analytic(x, t) for x in xs] for t in ts]
+
+    u_predict_new = [[first(pmean(phi([x, t], sol_new.estimated_nn_params[1]))) for x in xs]
+                     for t in ts]
+
+    diff_u_new = [[abs(u_analytic(x, t) -
+                       first(pmean(phi([x, t], sol_new.estimated_nn_params[1]))))
+                   for x in xs]
+                  for t in ts]
+
+    u_predict_old = [[first(pmean(phi([x, t], sol_old.estimated_nn_params[1]))) for x in xs]
+                     for t in ts]
+    diff_u_old = [[abs(u_analytic(x, t) -
+                       first(pmean(phi([x, t], sol_old.estimated_nn_params[1]))))
+                   for x in xs]
+                  for t in ts]
+
+    @test all(all, [((diff_u_new[i]) .^ 2 .< 0.5) for i in 1:6]) == true
+    @test all(all, [((diff_u_old[i]) .^ 2 .< 0.5) for i in 1:6]) == false
+
+    MSE_new = [sum(abs2, diff_u_new[i]) for i in 1:6]
+    MSE_old = [sum(abs2, diff_u_old[i]) for i in 1:6]
+    @test (MSE_new .< MSE_old) == [1, 1, 1, 1, 1, 1]
+
+    param_new = sol_new.estimated_de_params[1]
+    param_old = sol_old.estimated_de_params[1]
+    α = 1
+    @test abs(param_new - α) < 0.2 * α
+    @test abs(param_new - α) < abs(param_old - α)
+end
\ No newline at end of file

From 74bc459e2102e87df961bf3ff5fd1d73babfe1d6 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 11 Oct 2024 16:20:19 +0530
Subject: [PATCH 101/107] spell checks, Statistics.jl vers

---
 test/BPINN_PDEinvsol_tests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 24da3c7ffa..09d56f646e 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -218,7 +218,7 @@ end
         [α],
         defaults = Dict([α => 2.0]))
 
-    # neccesarry for loss function contruction (involves Operator masking) 
+    # neccesarry for loss function construction (involves Operator masking) 
     eqs = pde_system.eqs
     Dict_differentials = Dict()
     exps = toexpr.(eqs)

From 323247ff34f56c64c440e9a7766ccd3adc87dbc9 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 12 Oct 2024 07:09:28 +0530
Subject: [PATCH 102/107] update tests

---
 src/PDE_BPINN.jl              |  2 +-
 test/BPINN_PDE_tests.jl       | 25 +++++++++++++------------
 test/BPINN_PDEinvsol_tests.jl |  6 ++----
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index d0e36dd04f..9c283f1e00 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -306,7 +306,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         end
     end
 
-    # [WIP] add overall functionality for BC dataset points
+    # [WIP] add overall functionality for BC dataset points (case of parametric BC)
     if ((dataset_bc isa Nothing) && (dataset_pde isa Nothing))
         dataset = nothing
     elseif dataset_bc isa Nothing
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index e543baea27..ae97ee3751 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -14,8 +14,7 @@
     eq = Dt(u(t)) - cospi(2t) ~ 0
     bcs = [u(0.0) ~ 0.0]
     domains = [t ∈ Interval(0.0, 2.0)]
-
-    chainl = Chain(Dense(1, 6, tanh), Dense(6, 1))
+    chainl = Lux.Chain(Lux.Dense(1, 5, tanh), Lux.Dense(5, 5, tanh), Lux.Dense(5, 1))
     initl, st = Lux.setup(Random.default_rng(), chainl)
     @named pde_system = PDESystem(eq, bcs, domains, [t], [u(t)])
 
@@ -24,10 +23,10 @@
 
     sol1 = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
-        draw_samples = 2000,
-        bcstd = [0.02],
+        draw_samples = 250,
+        bcstd = [0.001],
         phystd = [0.01],
-        priorsNNw = (0.0, 10.0),
+        priorsNNw = (0.0, 1.0),
         saveats = [1 / 50.0])
 
     analytic_sol_func(u0, t) = u0 + sinpi(2t) / (2pi)
@@ -35,8 +34,8 @@
     u_real = [analytic_sol_func(0.0, t) for t in ts]
     u_predict = pmean(sol1.ensemblesol[1])
 
-    @test u_predict≈u_real atol=0.05
-    @test mean(u_predict .- u_real) < 1e-3
+    @test u_predict≈u_real atol=0.02
+    @test mean(abs.(u_predict .- u_real)) < 1e-3
 end
 
 @testitem "BPINN PDE II: 1D ODE" tags=[:pdebpinn] begin
@@ -182,10 +181,10 @@ end
 
     sol1 = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
-        draw_samples = 200,
-        bcstd = [0.01, 0.01, 0.01, 0.01],
-        phystd = [0.005],
-        priorsNNw = (0.0, 2.0),
+        draw_samples = 400,
+        bcstd = [0.05, 0.05, 0.05, 0.05],
+        phystd = [0.05],
+        priorsNNw = (0.0, 1.0),
         saveats = [1 / 100.0, 1 / 100.0])
 
     xs = sol.timepoints[1]
@@ -193,7 +192,9 @@ end
 
     u_predict = pmean(sol.ensemblesol[1])
     u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-    @test u_predict≈u_real atol=0.8
+
+    @test sum(abs2.(u_predict .- u_real)) < 0.1
+    @test u_predict≈u_real atol=0.1
 end
 
 @testitem "BPINN PDE: Translating from Flux" tags=[:pdebpinn] begin
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 09d56f646e..817992c2bf 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -243,8 +243,7 @@ end
         phystd = [0.2], l2std = [0.5], param = [Distributions.Normal(2.0, 2)],
         priorsNNw = (0.0, 1.0),
         saveats = [1 / 100.0, 1 / 100.0],
-        Dict_differentials = Dict_differentials,
-        progress = true)
+        Dict_differentials = Dict_differentials)
 
     sol_old = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
@@ -252,8 +251,7 @@ end
         bcstd = [0.1, 0.1, 0.1, 0.1, 0.1],
         phystd = [0.2], l2std = [0.5], param = [Distributions.Normal(2.0, 2)],
         priorsNNw = (0.0, 1.0),
-        saveats = [1 / 100.0, 1 / 100.0],
-        progress = true)
+        saveats = [1 / 100.0, 1 / 100.0])
 
     phi = discretization.phi[1]
     xs, ts = [infimum(d.domain):dx:supremum(d.domain)

From e140bdbe1b249ed54903e02d572a324e005bbc15 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Tue, 15 Oct 2024 17:49:28 +0530
Subject: [PATCH 103/107] low level changes, transform fixes

---
 src/BPINN_ode.jl        | 282 ++++++++++++++++++
 src/advancedHMC_MCMC.jl | 645 ++++++++++++++++++++++++++++++++++++++++
 test/BPINN_PDE_tests.jl |  11 +-
 test/BPINN_Tests.jl     |  97 ++----
 4 files changed, 950 insertions(+), 85 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index e69de29bb2..39bb0aac72 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -0,0 +1,282 @@
+# HIGH level API for BPINN ODE solver
+
+"""
+    BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
+                        priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
+                        phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
+                        MCMCargs = (n_leapfrog=30), nchains = 1, init_params = nothing,
+                        Adaptorkwargs = (Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, Metric = DiagEuclideanMetric),
+                        Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
+                        progress = false, verbose = false)
+
+Algorithm for solving ordinary differential equations using a Bayesian neural network. This is a specialization
+of the physics-informed neural network which is used as a solver for a standard `ODEProblem`.
+
+!!! warn
+
+    Note that BNNODE only supports ODEs which are written in the out-of-place form, i.e.
+    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the BNNODE
+    will exit with an error.
+
+## Positional Arguments
+
+* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer`.
+* `Kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC`
+
+## Keyword Arguments
+(refer `NeuralPDE.ahmc_bayesian_pinn_ode` keyword arguments.)
+
+## Example
+
+```julia
+linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = [5.0, -5.0]
+prob = ODEProblem(linear, u0, tspan, p)
+linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t))
+
+sol = solve(prob, Tsit5(); saveat = 0.05)
+u = sol.u[1:100]
+time = sol.t[1:100]
+x̂ = u .+ (u .* 0.2) .* randn(size(u))
+dataset = [x̂, time]
+
+chainlux = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+
+alg = BNNODE(chainlux, draw_samples = 2000,
+                       l2std = [0.05], phystd = [0.05],
+                       priorsNNw = (0.0, 3.0), progress = true)
+
+sol_lux = solve(prob, alg)
+
+# with parameter estimation
+alg = BNNODE(chainlux,dataset = dataset,
+                draw_samples = 2000,l2std = [0.05],
+                phystd = [0.05],priorsNNw = (0.0, 10.0),
+                param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
+                progress = true)
+
+sol_lux_pestim = solve(prob, alg)
+```
+
+## Solution Notes
+
+Note that the solution is evaluated at fixed time points according to the strategy chosen.
+ensemble solution is evaluated and given at steps of `saveat`.
+Dataset should only be provided when ODE parameter Estimation is being done.
+The neural network is a fully continuous solution so `BPINNsolution`
+is an accurate interpolation (up to the neural network training result). In addition, the
+`BPINNstats` is returned as `sol.fullsolution` for further analysis.
+
+## References
+
+Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural Networks for
+Forward and Inverse PDE Problems with Noisy Data".
+
+Kevin Linka, Amelie Schäfer, Xuhui Meng, Zongren Zou, George Em Karniadakis, Ellen Kuhl
+"Bayesian Physics Informed Neural Networks for real-world nonlinear dynamical systems".
+"""
+struct BNNODE{C, K, IT <: NamedTuple,
+    A <: NamedTuple, H <: NamedTuple,
+    ST <: Union{Nothing, AbstractTrainingStrategy},
+    I <: Union{Nothing, <:NamedTuple, Vector{<:AbstractFloat}},
+    P <: Union{Nothing, Vector{<:Distribution}},
+    D <:
+    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}} <:
+       NeuralPDEAlgorithm
+    chain::C
+    Kernel::K
+    strategy::ST
+    draw_samples::Int64
+    priorsNNw::Tuple{Float64, Float64}
+    param::P
+    l2std::Vector{Float64}
+    phystd::Vector{Float64}
+    phynewstd::Vector{Float64}
+    dataset::D
+    physdt::Float64
+    MCMCkwargs::H
+    nchains::Int64
+    init_params::I
+    Adaptorkwargs::A
+    Integratorkwargs::IT
+    numensemble::Int64
+    estim_collocate::Bool
+    autodiff::Bool
+    progress::Bool
+    verbose::Bool
+end
+function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
+        priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05], phynewstd = [0.05],
+        dataset = [nothing], physdt = 1 / 20.0, MCMCkwargs = (n_leapfrog = 30,), nchains = 1,
+        init_params = nothing,
+        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+            Metric = DiagEuclideanMetric,
+            targetacceptancerate = 0.8),
+        Integratorkwargs = (Integrator = Leapfrog,),
+        numensemble = floor(Int, draw_samples / 3),
+        estim_collocate = false,
+        autodiff = false, progress = false, verbose = false)
+    !(chain isa Lux.AbstractExplicitLayer) &&
+        (chain = adapt(FromFluxAdaptor(false, false), chain))
+    BNNODE(chain, Kernel, strategy,
+        draw_samples, priorsNNw, param, l2std,
+        phystd, phynewstd, dataset, physdt, MCMCkwargs,
+        nchains, init_params,
+        Adaptorkwargs, Integratorkwargs,
+        numensemble, estim_collocate,
+        autodiff, progress, verbose)
+end
+
+"""
+Contains `ahmc_bayesian_pinn_ode()` function output:
+
+1. A MCMCChains.jl chain object for sampled parameters.
+2. The set of all sampled parameters.
+3. Statistics like:
+    - n_steps
+    - acceptance_rate
+    - log_density
+    - hamiltonian_energy
+    - hamiltonian_energy_error
+    - numerical_error
+    - step_size
+    - nom_step_size
+"""
+struct BPINNstats{MC, S, ST}
+    mcmc_chain::MC
+    samples::S
+    statistics::ST
+end
+
+"""
+BPINN Solution contains the original solution from AdvancedHMC.jl sampling (BPINNstats contains fields related to that).
+
+1. `ensemblesol` is the Probabilistic Estimate (MonteCarloMeasurements.jl Particles type) of Ensemble solution from All Neural Network's (made using all sampled parameters) output's.
+2. `estimated_nn_params` - Probabilistic Estimate of NN params from sampled weights, biases.
+3. `estimated_de_params` - Probabilistic Estimate of DE params from sampled unknown DE parameters.
+"""
+struct BPINNsolution{O <: BPINNstats, E, NP, OP, P}
+    original::O
+    ensemblesol::E
+    estimated_nn_params::NP
+    estimated_de_params::OP
+    timepoints::P
+
+    function BPINNsolution(original,
+            ensemblesol,
+            estimated_nn_params,
+            estimated_de_params,
+            timepoints)
+        new{typeof(original), typeof(ensemblesol), typeof(estimated_nn_params),
+            typeof(estimated_de_params), typeof(timepoints)}(
+            original, ensemblesol, estimated_nn_params,
+            estimated_de_params, timepoints)
+    end
+end
+
+function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
+        alg::BNNODE,
+        args...;
+        dt = nothing,
+        timeseries_errors = true,
+        save_everystep = true,
+        adaptive = false,
+        abstol = 1.0f-6,
+        reltol = 1.0f-3,
+        verbose = false,
+        saveat = 1 / 50.0,
+        maxiters = nothing,)
+    @unpack chain, l2std, phystd, phynewstd, param, priorsNNw, Kernel, strategy,
+    draw_samples, dataset, init_params,
+    nchains, physdt, Adaptorkwargs, Integratorkwargs,
+    MCMCkwargs, numensemble, estim_collocate, autodiff, progress,
+    verbose = alg
+
+    # ahmc_bayesian_pinn_ode needs param=[] for easier vcat operation for full vector of parameters
+    param = param === nothing ? [] : param
+    strategy = strategy === nothing ? GridTraining : strategy
+
+    if draw_samples < 0
+        throw(error("Number of samples to be drawn has to be >=0."))
+    end
+
+    mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain,
+        strategy = strategy, dataset = dataset,
+        draw_samples = draw_samples,
+        init_params = init_params,
+        physdt = physdt, phynewstd = phynewstd,
+        l2std = l2std,
+        phystd = phystd,
+        priorsNNw = priorsNNw,
+        param = param,
+        nchains = nchains,
+        autodiff = autodiff,
+        Kernel = Kernel,
+        Adaptorkwargs = Adaptorkwargs,
+        Integratorkwargs = Integratorkwargs,
+        MCMCkwargs = MCMCkwargs,
+        progress = progress,
+        verbose = verbose,
+        estim_collocate = estim_collocate)
+
+    fullsolution = BPINNstats(mcmcchain, samples, statistics)
+    ninv = length(param)
+    t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2])
+
+    if chain isa Lux.AbstractExplicitLayer
+        θinit, st = Lux.setup(Random.default_rng(), chain)
+        θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
+             for i in 1:max(draw_samples - draw_samples ÷ 10, draw_samples - 1000)]
+
+        luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
+        # only need for size
+        θinit = collect(ComponentArrays.ComponentArray(θinit))
+    else
+        throw(error("Only Lux.AbstractExplicitLayer neural networks are supported"))
+    end
+
+    # constructing ensemble predictions
+    ensemblecurves = Vector{}[]
+    # check if NN output is more than 1
+    numoutput = size(luxar[1])[1]
+    if numoutput > 1
+        # Initialize a vector to store the separated outputs for each output dimension
+        output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput]
+
+        # Loop through each element in `luxar`
+        for element in luxar
+            for i in 1:numoutput
+                push!(output_matrices[i], element[i, :])  # Append the i-th output (i-th row) to the i-th output_matrices
+            end
+        end
+
+        for r in 1:numoutput
+            ensem_r = hcat(output_matrices[r]...)'
+            ensemblecurve_r = prob.u0[r] .+
+                              [Particles(ensem_r[:, i]) for i in 1:length(t)] .*
+                              (t .- prob.tspan[1])
+            push!(ensemblecurves, ensemblecurve_r)
+        end
+
+    else
+        ensemblecurve = prob.u0 .+
+                        [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .*
+                        (t .- prob.tspan[1])
+        push!(ensemblecurves, ensemblecurve)
+    end
+
+    nnparams = length(θinit)
+    estimnnparams = [Particles(reduce(hcat, samples[(end - numensemble):end])[i, :])
+                     for i in 1:nnparams]
+
+    if ninv == 0
+        estimated_params = [nothing]
+    else
+        estimated_params = [Particles(reduce(hcat, samples[(end - numensemble):end])[i, :])
+                            for i in (nnparams + 1):(nnparams + ninv)]
+    end
+
+    BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params, t)
+end
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index e69de29bb2..8b996fce5c 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -0,0 +1,645 @@
+mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
+    P <: Vector{<:Distribution},
+    D <:
+    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
+}
+    dim::Int
+    prob::SciMLBase.ODEProblem
+    chain::C
+    st::S
+    strategy::ST
+    dataset::D
+    priors::P
+    phystd::Vector{Float64}
+    phynewstd::Vector{Float64}
+    l2std::Vector{Float64}
+    autodiff::Bool
+    physdt::Float64
+    extraparams::Int
+    init_params::I
+    estim_collocate::Bool
+
+    function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
+            dataset,
+            priors, phystd, phynewstd, l2std, autodiff, physdt, extraparams,
+            init_params::AbstractVector, estim_collocate)
+        new{
+            typeof(chain),
+            Nothing,
+            typeof(strategy),
+            typeof(init_params),
+            typeof(priors),
+            typeof(dataset)
+        }(dim,
+            prob,
+            chain,
+            nothing, strategy,
+            dataset,
+            priors,
+            phystd,
+            phynewstd,
+            l2std,
+            autodiff,
+            physdt,
+            extraparams,
+            init_params,
+            estim_collocate)
+    end
+    function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
+            dataset,
+            priors, phystd, phynewstd, l2std, autodiff, physdt, extraparams,
+            init_params::NamedTuple, estim_collocate)
+        new{
+            typeof(chain),
+            typeof(st),
+            typeof(strategy),
+            typeof(init_params),
+            typeof(priors),
+            typeof(dataset)
+        }(dim,
+            prob,
+            chain, st, strategy,
+            dataset, priors,
+            phystd, phynewstd,
+             l2std,
+            autodiff,
+            physdt,
+            extraparams,
+            init_params,
+            estim_collocate)
+    end
+end
+
+"""
+Function needed for converting vector of sampled parameters into ComponentVector in case of Lux chain output, derivatives
+the sampled parameters are of exotic type `Dual` due to ForwardDiff's autodiff tagging.
+"""
+function vector_to_parameters(ps_new::AbstractVector,
+        ps::Union{NamedTuple, ComponentArrays.ComponentVector})
+    @assert length(ps_new) == Lux.parameterlength(ps)
+    i = 1
+    function get_ps(x)
+        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
+        i += length(x)
+        return z
+    end
+    return Functors.fmap(get_ps, ps)
+end
+
+vector_to_parameters(ps_new::AbstractVector, ps::AbstractVector) = ps_new
+
+function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
+    if Tar.estim_collocate
+        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ) +
+               L2loss2(Tar, θ)
+    else
+        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
+    end
+end
+
+LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
+
+function LogDensityProblems.capabilities(::LogTargetDensity)
+    LogDensityProblems.LogDensityOrder{1}()
+end
+
+"""
+suggested extra loss function for ODE solver case
+"""
+function L2loss2(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        autodiff = Tar.autodiff
+        # Timepoints to enforce Physics 
+        t = Tar.dataset[end]
+        u1 = Tar.dataset[2]
+        û = Tar.dataset[1]
+
+        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+
+        if length(Tar.prob.u0) == 1
+            physsol = [f(û[i],
+                ode_params,
+                t[i])
+                       for i in 1:length(û[:, 1])]
+        else
+            physsol = [f([û[i], u1[i]],
+                ode_params,
+                t[i])
+                       for i in 1:length(û)]
+        end
+        #form of NN output matrix output dim x n 
+        deri_physsol = reduce(hcat, physsol)
+   
+        physlogprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # can add phystdnew[i] for u[i] 
+            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
+                    LinearAlgebra.Diagonal(map(abs2,
+                        (Tar.phynewstd[i]) .*
+                        ones(length(nnsol[i, :]))))),
+                nnsol[i, :])
+        end
+        return physlogprob
+    else
+        return 0
+    end
+end
+
+"""
+L2 loss loglikelihood(needed for ODE parameter estimation).
+"""
+function L2LossData(Tar::LogTargetDensity, θ)
+    # check if dataset is provided
+    if Tar.dataset isa Vector{Nothing} || Tar.extraparams == 0
+        return 0
+    else
+        # matrix(each row corresponds to vector u's rows)
+        nn = Tar(Tar.dataset[end], θ[1:(length(θ) - Tar.extraparams)])
+
+        L2logprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # for u[i] ith vector must be added to dataset, nn[1,:] is the dx in lotka_volterra
+            L2logprob += logpdf(
+                MvNormal(nn[i, :],
+                    LinearAlgebra.Diagonal(abs2.(Tar.l2std[i] .*
+                                                 ones(length(Tar.dataset[i]))))),
+                Tar.dataset[i])
+        end
+        return L2logprob
+    end
+end
+
+"""
+Physics loglikelihood over problem timespan + dataset timepoints.
+"""
+function physloglikelihood(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+    p = Tar.prob.p
+    tspan = Tar.prob.tspan
+    autodiff = Tar.autodiff
+    strategy = Tar.strategy
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+    else
+        ode_params = p == SciMLBase.NullParameters() ? [] : p
+    end
+
+    return getlogpdf(strategy, Tar, f, autodiff, tspan, ode_params, θ)
+end
+
+function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
+        tspan,
+        ode_params, θ)
+    if Tar.dataset isa Vector{Nothing}
+        t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
+    else
+        t = vcat(collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]),
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+function getlogpdf(strategy::StochasticTraining,
+        Tar::LogTargetDensity,
+        f,
+        autodiff::Bool,
+        tspan,
+        ode_params,
+        θ)
+    if Tar.dataset isa Vector{Nothing}
+        t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
+    else
+        t = vcat([(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)],
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
+        autodiff::Bool,
+        tspan,
+        ode_params, θ)
+    function integrand(t::Number, θ)
+        innerdiff(Tar, f, autodiff, [t], θ, ode_params)
+    end
+    intprob = IntegralProblem(
+        integrand, (tspan[1], tspan[2]), θ; nout = length(Tar.prob.u0))
+    sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
+    sum(sol.u)
+end
+
+function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
+        autodiff::Bool,
+        tspan,
+        ode_params, θ)
+    minT = tspan[1]
+    maxT = tspan[2]
+
+    weights = strategy.weights ./ sum(strategy.weights)
+
+    N = length(weights)
+    points = strategy.points
+
+    difference = (maxT - minT) / N
+
+    data = Float64[]
+    for (index, item) in enumerate(weights)
+        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
+                    ((index - 1) * difference)
+        data = append!(data, temp_data)
+    end
+
+    if Tar.dataset isa Vector{Nothing}
+        t = data
+    else
+        t = vcat(data,
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+"""
+MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ.
+"""
+function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
+        ode_params)
+
+    # Tar used for phi and LogTargetDensity object attributes access
+    out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
+
+    # # reject samples case(write clear reason why)
+    if any(isinf, out[:, 1]) || any(isinf, ode_params)
+        return -Inf
+    end
+
+    # this is a vector{vector{dx,dy}}(handle case single u(float passed))
+    if length(out[:, 1]) == 1
+        physsol = [f(out[:, i][1],
+                       ode_params,
+                       t[i])
+                   for i in 1:length(out[1, :])]
+    else
+        physsol = [f(out[:, i],
+                       ode_params,
+                       t[i])
+                   for i in 1:length(out[1, :])]
+    end
+    physsol = reduce(hcat, physsol)
+
+    nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+    vals = nnsol .- physsol
+
+    # N dimensional vector if N outputs for NN(each row has logpdf of u[i] where u is vector of dependant variables)
+    return [logpdf(
+                MvNormal(vals[i, :],
+                    LinearAlgebra.Diagonal(abs2.(Tar.phystd[i] .*
+                                                 ones(length(vals[i, :]))))),
+                zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
+end
+
+"""
+Prior logpdf for NN parameters + ODE constants.
+"""
+function priorweights(Tar::LogTargetDensity, θ)
+    allparams = Tar.priors
+    # nn weights
+    nnwparams = allparams[1]
+
+    if Tar.extraparams > 0
+        # Vector of ode parameters priors
+        invpriors = allparams[2:end]
+
+        invlogpdf = sum(
+            logpdf(invpriors[length(θ) - i + 1], θ[i])
+            for i in (length(θ) - Tar.extraparams + 1):length(θ);
+            init = 0.0)
+
+        return (invlogpdf
+                +
+                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
+    else
+        return logpdf(nnwparams, θ)
+    end
+end
+
+function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params)
+    θ, st = Lux.setup(Random.default_rng(), chain)
+    return init_params, chain, st
+end
+
+function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
+    θ, st = Lux.setup(Random.default_rng(), chain)
+    return θ, chain, st
+end
+
+"""
+NN OUTPUT AT t,θ ~ phi(t,θ).
+"""
+function (f::LogTargetDensity{C, S})(t::AbstractVector,
+        θ) where {C <: Lux.AbstractExplicitLayer, S}
+    θ = vector_to_parameters(θ, f.init_params)
+    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
+    ChainRulesCore.@ignore_derivatives f.st = st
+    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
+end
+
+function (f::LogTargetDensity{C, S})(t::Number,
+        θ) where {C <: Lux.AbstractExplicitLayer, S}
+    θ = vector_to_parameters(θ, f.init_params)
+    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
+    ChainRulesCore.@ignore_derivatives f.st = st
+    f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y
+end
+
+"""
+Similar to ode_dfdx() in NNODE.
+"""
+function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
+    if autodiff
+        hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...)
+    else
+        (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
+    end
+end
+
+function kernelchoice(Kernel, MCMCkwargs)
+    if Kernel == HMCDA
+        δ, λ = MCMCkwargs[:δ], MCMCkwargs[:λ]
+        Kernel(δ, λ)
+    elseif Kernel == NUTS
+        δ, max_depth, Δ_max = MCMCkwargs[:δ], MCMCkwargs[:max_depth], MCMCkwargs[:Δ_max]
+        Kernel(δ, max_depth = max_depth, Δ_max = Δ_max)
+    else
+        # HMC
+        n_leapfrog = MCMCkwargs[:n_leapfrog]
+        Kernel(n_leapfrog)
+    end
+end
+
+"""
+    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
+                        dataset = [nothing],init_params = nothing,
+                        draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
+                        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
+                        param = [], nchains = 1, autodiff = false, Kernel = HMC,
+                        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+                                         Metric = DiagEuclideanMetric,
+                                         targetacceptancerate = 0.8),
+                        Integratorkwargs = (Integrator = Leapfrog,),
+                        MCMCkwargs = (n_leapfrog = 30,),
+                        progress = false, verbose = false)
+
+!!! warn
+
+    Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the out-of-place form, i.e.
+    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the `ahmc_bayesian_pinn_ode()`
+    will exit with an error.
+
+## Example
+
+```julia
+linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = [5.0, -5.0]
+prob = ODEProblem(linear, u0, tspan, p)
+
+### CREATE DATASET (Necessity for accurate Parameter estimation)
+sol = solve(prob, Tsit5(); saveat = 0.05)
+u = sol.u[1:100]
+time = sol.t[1:100]
+
+### dataset and BPINN create
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+dataset = [x̂, time]
+
+chain1 = Lux.Chain(Lux.Dense(1, 5, tanh), Lux.Dense(5, 5, tanh), Lux.Dense(5, 1)
+
+### simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chain1,
+                                                            dataset = dataset,
+                                                            draw_samples = 1500,
+                                                            l2std = [0.05],
+                                                            phystd = [0.05],
+                                                            priorsNNw = (0.0,3.0))
+
+### solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chain1,
+                                                            dataset = dataset,
+                                                            draw_samples = 1500,
+                                                            l2std = [0.05],
+                                                            phystd = [0.05],
+                                                            priorsNNw = (0.0,3.0),
+                                                            param = [Normal(6.5,0.5), Normal(-3,0.5)])
+```
+
+## NOTES
+
+Dataset is required for accurate Parameter estimation + solving equations
+Incase you are only solving the Equations for solution, do not provide dataset
+
+## Positional Arguments
+
+* `prob`: DEProblem(out of place and the function signature should be f(u,p,t).
+* `chain`: Lux Neural Netork which would be made the Bayesian PINN.
+
+## Keyword Arguments
+
+* `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
+* `init_params`: initial parameter values for BPINN (ideally for multiple chains different initializations preferred)
+* `nchains`: number of chains you want to sample
+* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
+* `l2std`: standard deviation of BPINN prediction against L2 losses/Dataset
+* `phystd`: standard deviation of BPINN prediction against Chosen Underlying ODE System
+* `phynewstd`: standard deviation of new loss func term
+* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of BPINN are Normal Distributions by default.
+* `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
+* `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
+* `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
+* `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implementations HMC/NUTS/HMCDA)
+* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+    Note: Target percentage(in decimal) of iterations in which the proposals are accepted (0.8 by default)
+* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's(HMC/NUTS/HMCDA) Arguments, as follows :
+    * `n_leapfrog`: number of leapfrog steps for HMC
+    * `δ`: target acceptance probability for NUTS and HMCDA
+    * `λ`: target trajectory length for HMCDA
+    * `max_depth`: Maximum doubling tree depth (NUTS)
+    * `Δ_max`: Maximum divergence during doubling tree (NUTS)
+    Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+* `progress`: controls whether to show the progress meter or not.
+* `verbose`: controls the verbosity. (Sample call args in AHMC)
+
+## Warnings
+
+* AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
+"""
+function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
+        strategy = GridTraining, dataset = [nothing],
+        init_params = nothing, draw_samples = 1000,
+        physdt = 1 / 20.0, l2std = [0.05],
+        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
+        param = [], nchains = 1, autodiff = false,
+        Kernel = HMC,
+        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+        Integratorkwargs = (Integrator = Leapfrog,),
+        MCMCkwargs = (n_leapfrog = 30,),
+        progress = false, verbose = false,
+        estim_collocate = false)
+    !(chain isa Lux.AbstractExplicitLayer) &&
+        (chain = adapt(FromFluxAdaptor(false, false), chain))
+    # NN parameter prior mean and variance(PriorsNN must be a tuple)
+    if isinplace(prob)
+        throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
+    end
+
+    strategy = strategy == GridTraining ? strategy(physdt) : strategy
+
+    if dataset != [nothing] &&
+       (length(dataset) < 2 || !(dataset isa Vector{<:Vector{<:AbstractFloat}}))
+        throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
+    end
+
+    if dataset != [nothing] && param == []
+        println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
+    elseif dataset == [nothing] && param != []
+        throw(error("Dataset Required for Parameter Estimation."))
+    end
+
+    if chain isa Lux.AbstractExplicitLayer
+        # Lux-Named Tuple
+        initial_nnθ, recon, st = generate_Tar(chain, init_params)
+    else
+        error("Only Lux.AbstractExplicitLayer Neural networks are supported")
+    end
+
+    if nchains > Threads.nthreads()
+        throw(error("number of chains is greater than available threads"))
+    elseif nchains < 1
+        throw(error("number of chains must be greater than 1"))
+    end
+
+    # eltype(physdt) cause needs Float64 for find_good_stepsize
+    # Lux chain(using component array later as vector_to_parameter need namedtuple)
+    initial_θ = collect(eltype(physdt),
+        vcat(ComponentArrays.ComponentArray(initial_nnθ)))
+
+    # adding ode parameter estimation
+    nparameters = length(initial_θ)
+    ninv = length(param)
+    priors = [
+        MvNormal(priorsNNw[1] * ones(nparameters),
+        LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
+    ]
+
+    # append Ode params to all paramvector
+    if ninv > 0
+        # shift ode params(initialise ode params by prior means)
+        initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
+        priors = vcat(priors, param)
+        nparameters += ninv
+    end
+
+    t0 = prob.tspan[1]
+    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
+    ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
+        phystd, phynewstd, l2std, autodiff, physdt, ninv, initial_nnθ, estim_collocate)
+
+    try
+        ℓπ(t0, initial_θ[1:(nparameters - ninv)])
+    catch err
+        if isa(err, DimensionMismatch)
+            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
+        else
+            throw(err)
+        end
+    end
+
+    @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, initial_θ))
+    @info("Current Prior Log-likelihood : ", priorweights(ℓπ, initial_θ))
+    @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
+    if estim_collocate
+        @info("Current gradient loss against dataset Log-likelihood : ",
+            L2loss2(ℓπ, initial_θ))
+    end
+
+    Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
+    Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
+
+    # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
+    metric = Metric(nparameters)
+    hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
+
+    # parallel sampling option
+    if nchains != 1
+        # Cache to store the chains
+        chains = Vector{Any}(undef, nchains)
+        statsc = Vector{Any}(undef, nchains)
+        samplesc = Vector{Any}(undef, nchains)
+
+        Threads.@threads for i in 1:nchains
+            # each chain has different initial NNparameter values(better posterior exploration)
+            initial_θ = vcat(randn(nparameters - ninv),
+                initial_θ[(nparameters - ninv + 1):end])
+            initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
+            integrator = integratorchoice(Integratorkwargs, initial_ϵ)
+            adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
+                StepSizeAdaptor(targetacceptancerate, integrator))
+
+            MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
+            Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
+            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
+                progress = progress, verbose = verbose)
+
+            samplesc[i] = samples
+            statsc[i] = stats
+            mcmc_chain = Chains(hcat(samples...)')
+            chains[i] = mcmc_chain
+        end
+
+        return chains, samplesc, statsc
+    else
+        initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
+        integrator = integratorchoice(Integratorkwargs, initial_ϵ)
+        adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
+            StepSizeAdaptor(targetacceptancerate, integrator))
+
+        MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
+        Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
+        samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
+            adaptor; progress = progress, verbose = verbose)
+
+        @info("Sampling Complete.")
+        @info("Final Physics Log-likelihood : ", physloglikelihood(ℓπ, samples[end]))
+        @info("Final Prior Log-likelihood : ", priorweights(ℓπ, samples[end]))
+        @info("Final MSE against dataset Log-likelihood : ", L2LossData(ℓπ, samples[end]))
+        if estim_collocate
+            @info("Final gradient loss against dataset Log-likelihood : ",
+                L2loss2(ℓπ, samples[end]))
+        end
+
+        # return a chain(basic chain),samples and stats
+        matrix_samples = reshape(hcat(samples...), (length(samples[1]), length(samples), 1))
+        mcmc_chain = MCMCChains.Chains(matrix_samples)
+        return mcmc_chain, samples, stats
+    end
+end
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index ae97ee3751..d9ce948677 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -181,10 +181,10 @@ end
 
     sol1 = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
-        draw_samples = 400,
-        bcstd = [0.05, 0.05, 0.05, 0.05],
-        phystd = [0.05],
-        priorsNNw = (0.0, 1.0),
+        draw_samples = 200,
+        bcstd = [0.0025, 0.0025, 0.0025, 0.0025],
+        phystd = [0.005],
+        priorsNNw = (0.0, 0.5),
         saveats = [1 / 100.0, 1 / 100.0])
 
     xs = sol.timepoints[1]
@@ -193,8 +193,9 @@ end
     u_predict = pmean(sol.ensemblesol[1])
     u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
 
+    @test mean(abs2.(u_predict .- u_real)) < 5e-3
+    @test all(abs.(u_predict .- u_real) .< 15e-3)
     @test sum(abs2.(u_predict .- u_real)) < 0.1
-    @test u_predict≈u_real atol=0.1
 end
 
 @testitem "BPINN PDE: Translating from Flux" tags=[:pdebpinn] begin
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index d53553f29e..3454a37b99 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -272,7 +272,7 @@ end
         dataset = dataset,
         draw_samples = 1000,
         l2std = [0.1],
-        phystd = [0.03],
+        phystd = [0.01],
         priorsNNw = (0.0,
             1.0),
         param = [
@@ -284,7 +284,8 @@ end
         dataset = dataset,
         draw_samples = 1000,
         l2std = [0.1],
-        phystd = [0.03],
+        phystd = [0.01],
+        phynewstd = [0.01],
         priorsNNw = (0.0,
             1.0),
         param = [
@@ -295,114 +296,50 @@ end
         dataset = dataset,
         draw_samples = 1000,
         l2std = [0.1],
-        phystd = [0.03],
+        phystd = [0.01],
+        phynewstd = [0.05],
         priorsNNw = (0.0,
             1.0),
         param = [
             Normal(-7, 3)
-        ], estim_collocate = true)
+        ], numensemble = 200,
+        estim_collocate = true)
 
     sol3lux_pestim = solve(prob, alg)
 
     # testing timepoints
     t = sol.t
     #------------------------------ ahmc_bayesian_pinn_ode() call
-    # Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
+    # Mean of last 200 sampled parameter's curves(lux chains)[Ensemble predictions]
     θ = [vector_to_parameters(fhsampleslux12[i][1:(end - 1)], θinit)
-         for i in 750:length(fhsampleslux12)]
+         for i in 800:length(fhsampleslux12)]
     luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
     θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit)
-         for i in 750:length(fhsampleslux22)]
+         for i in 800:length(fhsampleslux22)]
     luxar = [chainlux12(t', θ[i], st)[1] for i in eachindex(θ)]
     luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
     meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
-    @test mean(abs.(sol.u .- meanscurve2_2)) < 6e-2
-    @test mean(abs.(physsol1 .- meanscurve2_2)) < 6e-2
+    @test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
+    @test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
     @test mean(abs.(sol.u .- meanscurve2_1)) > mean(abs.(sol.u .- meanscurve2_2))
     @test mean(abs.(physsol1 .- meanscurve2_1)) > mean(abs.(physsol1 .- meanscurve2_2))
 
     # estimated parameters(lux chain)
-    param2 = mean(i[62] for i in fhsampleslux22[750:length(fhsampleslux22)])
-    @test abs(param2 - p) < abs(0.25 * p)
+    param2 = mean(i[62] for i in fhsampleslux22[800:length(fhsampleslux22)])
+    @test abs(param2 - p) < abs(0.2 * p)
 
-    param1 = mean(i[62] for i in fhsampleslux12[750:length(fhsampleslux12)])
-    @test abs(param1 - p) < abs(0.75 * p)
+    param1 = mean(i[62] for i in fhsampleslux12[800:length(fhsampleslux12)])
+    @test !(abs(param1 - p) < abs(0.2 * p))
     @test abs(param2 - p) < abs(param1 - p)
 
     #-------------------------- solve() call 
     # (lux chain)
-    @test mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 0.1
+    @test mean(abs.(physsol2 .- pmean(sol3lux_pestim.ensemblesol[1]))) < 5e-2
     # estimated parameters(lux chain)
     param3 = sol3lux_pestim.estimated_de_params[1]
     @test abs(param3 - p) < abs(0.2 * p)
-end
-
-@testset "Example 4 - improvement" begin
-    function lotka_volterra(u, p, t)
-        # Model parameters.
-        α, β, γ, δ = p
-        # Current state.
-        x, y = u
-
-        # Evaluate differential equations.
-        dx = (α - β * y) * x # prey
-        dy = (δ * x - γ) * y # predator
-
-        return [dx, dy]
-    end
-
-    # initial-value problem.
-    u0 = [1.0, 1.0]
-    p = [1.5, 1.0, 3.0, 1.0]
-    tspan = (0.0, 4.0)
-    prob = ODEProblem(lotka_volterra, u0, tspan, p)
-
-    # Solve using OrdinaryDiffEq.jl solver
-    dt = 0.2
-    solution = solve(prob, Tsit5(); saveat = dt)
-
-    times = solution.t
-    u = hcat(solution.u...)
-    x = u[1, :] + (0.8 .* randn(length(u[1, :])))
-    y = u[2, :] + (0.8 .* randn(length(u[2, :])))
-    dataset = [x, y, times]
-
-    chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
-        Lux.Dense(6, 2))
-
-    alg1 = BNNODE(chain;
-        dataset = dataset,
-        draw_samples = 1000,
-        l2std = [0.2, 0.2],
-        phystd = [0.1, 0.1],
-        priorsNNw = (0.0, 1.0),
-        param = [
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5)])
-
-    alg2 = BNNODE(chain;
-        dataset = dataset,
-        draw_samples = 1000,
-        l2std = [0.2, 0.2],
-        phystd = [0.1, 0.1],
-        priorsNNw = (0.0, 1.0),
-        param = [
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5),
-            Normal(2, 0.5)], estim_collocate = true)
-
-    @time sol_pestim1 = solve(prob, alg1; saveat = dt)
-    @time sol_pestim2 = solve(prob, alg2; saveat = dt)
-
-    unsafe_comparisons(true)
-    bitvec = abs.(p .- sol_pestim1.estimated_de_params) .>
-             abs.(p .- sol_pestim2.estimated_de_params)
-    @test bitvec == ones(size(bitvec))
 end
\ No newline at end of file

From 08c4825fb1eacfc0a2a3032f8511b28dc5224a56 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 18 Oct 2024 13:53:23 +0530
Subject: [PATCH 104/107] changes from reviews

---
 test/BPINN_Tests.jl | 167 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 165 insertions(+), 2 deletions(-)

diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 3454a37b99..6ef8294216 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -6,7 +6,7 @@ using Statistics, Random, Functors, ComponentArrays
 using NeuralPDE, MonteCarloMeasurements
 using Flux
 
-# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
+# note that current testing bounds can be further tightened but have been inflated for support for Julia build v1
 # on latest Julia version it performs much better for below tests
 Random.seed!(100)
 
@@ -342,4 +342,167 @@ end
     # estimated parameters(lux chain)
     param3 = sol3lux_pestim.estimated_de_params[1]
     @test abs(param3 - p) < abs(0.2 * p)
-end
\ No newline at end of file
+end
+
+@testset "Example 4 - improvement" begin
+    function lotka_volterra(u, p, t)
+        # Model parameters.
+        α, β, γ, δ = p
+        # Current state.
+        x, y = u
+
+        # Evaluate differential equations.
+        dx = (1 - β * y) * x * α # prey
+        dy = (δ * x - 1) * y * γ  # predator
+
+        return [dx, dy]
+    end
+
+    # initial-value problem.
+    u0 = [1.0, 1.0]
+    p = [1.5, 2 / 3, 3.0, 1 / 3]
+    tspan = (0.0, 4.0)
+    prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+    # Solve using OrdinaryDiffEq.jl solver
+    dt = 0.2
+    solution = solve(prob, Tsit5(); saveat = dt)
+
+    times = solution.t
+    u = hcat(solution.u...)
+    x = u[1, :] + (0.8 .* randn(length(u[1, :])))
+    y = u[2, :] + (0.8 .* randn(length(u[2, :])))
+    dataset = [x, y, times]
+
+    chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+        Lux.Dense(6, 2))
+
+    alg1 = BNNODE(chain;
+        dataset = dataset,
+        draw_samples = 1000,
+        l2std = [0.05, 0.05],
+        phystd = [0.2, 0.2],
+        priorsNNw = (0.0, 1.0),
+        param = [
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5)])
+
+    alg2 = BNNODE(chain;
+        dataset = dataset,
+        draw_samples = 1000,
+        l2std = [0.05, 0.05],
+        phystd = [0.2, 0.2],
+        phynewstd = [0.3, 0.1],
+        priorsNNw = (0.0, 1.0),
+        param = [
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5),
+            Normal(2, 0.5)], estim_collocate = true)
+
+    @time sol_pestim1 = solve(prob, alg1; saveat = dt)
+    @time sol_pestim2 = solve(prob, alg2; saveat = dt)
+
+    unsafe_comparisons(true)
+    bitvec = abs.(p .- sol_pestim1.estimated_de_params) .>
+             abs.(p .- sol_pestim2.estimated_de_params)
+    @test bitvec == ones(size(bitvec))
+end
+
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    β, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (3 - β * y) * x # prey
+    dy = (δ * x - 3) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [2, 1]
+tspan = (0.0, 4.0)
+
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+
+# Solve using OrdinaryDiffEq.jl solver
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+
+times = solution.t
+u = hcat(solution.u...)
+x = u[1, :] + (0.4 .* randn(length(u[1, :])))
+y = u[2, :] + (0.4 .* randn(length(u[2, :])))
+dataset = [x, y, times]
+scatter!(times, x)
+scatter!(times, y)
+chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+    Lux.Dense(6, 2))
+
+alg1 = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.2, 0.2],
+    priorsNNw = (0.0, 1.0),
+    param = [
+        Normal(1,1),
+        Normal(1,1),], progress = true)
+
+alg2 = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.2, 0.2],
+    phynewstd = [0.2, 0.2],
+    priorsNNw = (0.0, 1.0),
+    param = [
+        Normal(1,1),
+        Normal(1,1)], estim_collocate = true, progress = true)
+
+@time sol_pestim1 = solve(prob, alg1; saveat = dt)
+@time sol_pestim2 = solve(prob, alg2; saveat = dt)
+
+unsafe_comparisons(true)
+bitvec = abs.(p .- pmean(sol_pestim1.estimated_de_params)) .>
+         abs.(p .- pmean(sol_pestim2.estimated_de_params))
+@test bitvec == ones(size(bitvec))
+
+
+pmean(sol_pestim1.estimated_de_params)
+
+sol_pestim2.estimated_de_params
+sol_pestim1.estimated_de_params
+sol_pestim2.estimated_de_params
+
+sol_pestim1.estimated_de_params
+sol_pestim2.estimated_de_params
+
+sol_pestim1.estimated_de_params
+sol_pestim2.estimated_de_params
+
+p
+sol_pestim1.timepoints
+plot!(sol_pestim1.timepoints, sol_pestim1.ensemblesol[1])
+plot!(sol_pestim2.timepoints, sol_pestim2.ensemblesol[1])
+plot!(sol_pestim1.timepoints, sol_pestim1.ensemblesol[2])
+plot!(sol_pestim2.timepoints, sol_pestim2.ensemblesol[2])
+
+plot!(sol_pestim1.timepoints, pmean(sol_pestim1.ensemblesol[1]))
+plot!(sol_pestim2.timepoints, pmean(sol_pestim2.ensemblesol[1]))
+plot!(sol_pestim1.timepoints, pmean(sol_pestim1.ensemblesol[2]))
+plot!(sol_pestim2.timepoints, pmean(sol_pestim2.ensemblesol[2]))
+
+
+plot(times, u[1, :])
+plot!(times, u[2, :])
+
+plot(sol_pestim1.ensemblesol)
+
+# Parametric PDEs are ill posed problems as, non convex optimization and non global minima might be our solution
\ No newline at end of file

From 50a36f78560ff1a1b47a4187a639af0d8e1f94c7 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 18 Oct 2024 20:32:57 +0530
Subject: [PATCH 105/107] conflicts

---
 Project.toml     |  11 ++-
 src/BPINN_ode.jl | 214 ++++++++++++++++++-----------------------------
 2 files changed, 91 insertions(+), 134 deletions(-)

diff --git a/Project.toml b/Project.toml
index 304d6efb97..ab2a6ae2f6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -114,20 +114,29 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
+DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
+ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
 LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11"
+LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
+LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11"
 MethodOfLines = "94925ecb-adb7-4558-8ed8-f975c56a0bf4"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
 StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0"
 TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
+ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
+StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0"
+TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "CUDA", "DiffEqNoiseProcess", "ExplicitImports", "Flux", "Hwloc", "InteractiveUtils", "LineSearches", "LuxCUDA", "LuxCore", "LuxLib", "MethodOfLines", "OptimizationOptimJL", "OrdinaryDiffEq", "ReTestItems", "StochasticDiffEq", "TensorBoardLogger", "Test"]
+test = ["Aqua", "CUDA", "DiffEqNoiseProcess", "ExplicitImports", "Flux", "Hwloc", "InteractiveUtils", "LineSearches", "LuxCUDA", "LuxCore", "LuxLib", "MethodOfLines", "OptimizationOptimJL", "OrdinaryDiffEq", "ReTestItems", "StochasticDiffEq", "TensorBoardLogger", "Test"]
\ No newline at end of file
diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 39bb0aac72..243d681298 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -1,16 +1,18 @@
 # HIGH level API for BPINN ODE solver
 
 """
-    BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
-                        priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
-                        phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
-                        MCMCargs = (n_leapfrog=30), nchains = 1, init_params = nothing,
-                        Adaptorkwargs = (Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, Metric = DiagEuclideanMetric),
-                        Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
-                        progress = false, verbose = false)
-
-Algorithm for solving ordinary differential equations using a Bayesian neural network. This is a specialization
-of the physics-informed neural network which is used as a solver for a standard `ODEProblem`.
+    BNNODE(chain, kernel = HMC; strategy = nothing, draw_samples = 2000,
+           priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
+           phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
+           MCMCargs = (; n_leapfrog=30), nchains = 1, init_params = nothing,
+           Adaptorkwargs = (; Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
+                              Metric = DiagEuclideanMetric),
+           Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
+           progress = false, verbose = false)
+
+Algorithm for solving ordinary differential equations using a Bayesian neural network. This
+is a specialization of the physics-informed neural network which is used as a solver for a
+standard `ODEProblem`.
 
 !!! warn
 
@@ -20,10 +22,11 @@ of the physics-informed neural network which is used as a solver for a standard
 
 ## Positional Arguments
 
-* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer`.
-* `Kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC`
+* `chain`: A neural network architecture, defined as a `Lux.AbstractLuxLayer`.
+* `kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC`
 
 ## Keyword Arguments
+
 (refer `NeuralPDE.ahmc_bayesian_pinn_ode` keyword arguments.)
 
 ## Example
@@ -44,18 +47,15 @@ dataset = [x̂, time]
 
 chainlux = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
 
-alg = BNNODE(chainlux, draw_samples = 2000,
-                       l2std = [0.05], phystd = [0.05],
-                       priorsNNw = (0.0, 3.0), progress = true)
+alg = BNNODE(chainlux; draw_samples = 2000, l2std = [0.05], phystd = [0.05],
+             priorsNNw = (0.0, 3.0), progress = true)
 
 sol_lux = solve(prob, alg)
 
 # with parameter estimation
-alg = BNNODE(chainlux,dataset = dataset,
-                draw_samples = 2000,l2std = [0.05],
-                phystd = [0.05],priorsNNw = (0.0, 10.0),
-                param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
-                progress = true)
+alg = BNNODE(chainlux; dataset, draw_samples = 2000, l2std = [0.05], phystd = [0.05],
+             priorsNNw = (0.0, 10.0), param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
+             progress = true)
 
 sol_lux_pestim = solve(prob, alg)
 ```
@@ -71,62 +71,49 @@ is an accurate interpolation (up to the neural network training result). In addi
 
 ## References
 
-Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural Networks for
-Forward and Inverse PDE Problems with Noisy Data".
+Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural
+Networks for Forward and Inverse PDE Problems with Noisy Data".
 
 Kevin Linka, Amelie Schäfer, Xuhui Meng, Zongren Zou, George Em Karniadakis, Ellen Kuhl
 "Bayesian Physics Informed Neural Networks for real-world nonlinear dynamical systems".
 """
-struct BNNODE{C, K, IT <: NamedTuple,
-    A <: NamedTuple, H <: NamedTuple,
-    ST <: Union{Nothing, AbstractTrainingStrategy},
-    I <: Union{Nothing, <:NamedTuple, Vector{<:AbstractFloat}},
-    P <: Union{Nothing, Vector{<:Distribution}},
-    D <:
-    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}} <:
-       NeuralPDEAlgorithm
-    chain::C
-    Kernel::K
-    strategy::ST
-    draw_samples::Int64
+@concrete struct BNNODE <: NeuralPDEAlgorithm
+    chain <: AbstractLuxLayer
+    kernel
+    strategy <: Union{Nothing, AbstractTrainingStrategy}
+    draw_samples::Int
     priorsNNw::Tuple{Float64, Float64}
-    param::P
+    param <: Union{Nothing, Vector{<:Distribution}}
     l2std::Vector{Float64}
     phystd::Vector{Float64}
     phynewstd::Vector{Float64}
-    dataset::D
+    dataset <: Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
     physdt::Float64
-    MCMCkwargs::H
-    nchains::Int64
-    init_params::I
-    Adaptorkwargs::A
-    Integratorkwargs::IT
-    numensemble::Int64
+    MCMCkwargs <: NamedTuple
+    nchains::Int
+    init_params <: Union{Nothing, <:NamedTuple, Vector{<:AbstractFloat}}
+    Adaptorkwargs <: NamedTuple
+    Integratorkwargs <: NamedTuple
+    numensemble::Int
     estim_collocate::Bool
     autodiff::Bool
     progress::Bool
     verbose::Bool
 end
-function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
-        priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05], phynewstd = [0.05],
-        dataset = [nothing], physdt = 1 / 20.0, MCMCkwargs = (n_leapfrog = 30,), nchains = 1,
-        init_params = nothing,
+
+function BNNODE(chain, kernel = HMC; strategy = nothing, draw_samples = 2000,
+        priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05],
+        phynewstd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
+        MCMCkwargs = (n_leapfrog = 30,), nchains = 1, init_params = nothing,
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-            Metric = DiagEuclideanMetric,
-            targetacceptancerate = 0.8),
+            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,),
         numensemble = floor(Int, draw_samples / 3),
-        estim_collocate = false,
-        autodiff = false, progress = false, verbose = false)
-    !(chain isa Lux.AbstractExplicitLayer) &&
-        (chain = adapt(FromFluxAdaptor(false, false), chain))
-    BNNODE(chain, Kernel, strategy,
-        draw_samples, priorsNNw, param, l2std,
-        phystd, phynewstd, dataset, physdt, MCMCkwargs,
-        nchains, init_params,
-        Adaptorkwargs, Integratorkwargs,
-        numensemble, estim_collocate,
-        autodiff, progress, verbose)
+        estim_collocate = false, autodiff = false, progress = false, verbose = false)
+    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
+    return BNNODE(chain, kernel, strategy, draw_samples, priorsNNw, param, l2std, phystd,
+        phynewstd, dataset, physdt, MCMCkwargs, nchains, init_params, Adaptorkwargs,
+        Integratorkwargs, numensemble, estim_collocate, autodiff, progress, verbose)
 end
 
 """
@@ -144,98 +131,59 @@ Contains `ahmc_bayesian_pinn_ode()` function output:
     - step_size
     - nom_step_size
 """
-struct BPINNstats{MC, S, ST}
-    mcmc_chain::MC
-    samples::S
-    statistics::ST
+@concrete struct BPINNstats
+    mcmc_chain
+    samples
+    statistics
 end
 
 """
-BPINN Solution contains the original solution from AdvancedHMC.jl sampling (BPINNstats contains fields related to that).
+BPINN Solution contains the original solution from AdvancedHMC.jl sampling (BPINNstats
+contains fields related to that).
 
-1. `ensemblesol` is the Probabilistic Estimate (MonteCarloMeasurements.jl Particles type) of Ensemble solution from All Neural Network's (made using all sampled parameters) output's.
+1. `ensemblesol` is the Probabilistic Estimate (MonteCarloMeasurements.jl Particles type) of
+   Ensemble solution from All Neural Network's (made using all sampled parameters) output's.
 2. `estimated_nn_params` - Probabilistic Estimate of NN params from sampled weights, biases.
-3. `estimated_de_params` - Probabilistic Estimate of DE params from sampled unknown DE parameters.
+3. `estimated_de_params` - Probabilistic Estimate of DE params from sampled unknown DE
+   parameters.
 """
-struct BPINNsolution{O <: BPINNstats, E, NP, OP, P}
-    original::O
-    ensemblesol::E
-    estimated_nn_params::NP
-    estimated_de_params::OP
-    timepoints::P
-
-    function BPINNsolution(original,
-            ensemblesol,
-            estimated_nn_params,
-            estimated_de_params,
-            timepoints)
-        new{typeof(original), typeof(ensemblesol), typeof(estimated_nn_params),
-            typeof(estimated_de_params), typeof(timepoints)}(
-            original, ensemblesol, estimated_nn_params,
-            estimated_de_params, timepoints)
-    end
+@concrete struct BPINNsolution
+    original <: BPINNstats
+    ensemblesol
+    estimated_nn_params
+    estimated_de_params
+    timepoints
 end
 
-function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
-        alg::BNNODE,
-        args...;
-        dt = nothing,
-        timeseries_errors = true,
-        save_everystep = true,
-        adaptive = false,
-        abstol = 1.0f-6,
-        reltol = 1.0f-3,
-        verbose = false,
-        saveat = 1 / 50.0,
-        maxiters = nothing,)
-    @unpack chain, l2std, phystd, phynewstd, param, priorsNNw, Kernel, strategy,
-    draw_samples, dataset, init_params,
-    nchains, physdt, Adaptorkwargs, Integratorkwargs,
-    MCMCkwargs, numensemble, estim_collocate, autodiff, progress,
-    verbose = alg
+function SciMLBase.__solve(prob::SciMLBase.ODEProblem, alg::BNNODE, args...; dt = nothing,
+        timeseries_errors = true, save_everystep = true, adaptive = false,
+        abstol = 1.0f-6, reltol = 1.0f-3, verbose = false, saveat = 1 / 50.0,
+        maxiters = nothing)
+    (; chain, param, strategy, draw_samples, numensemble, verbose) = alg
 
     # ahmc_bayesian_pinn_ode needs param=[] for easier vcat operation for full vector of parameters
     param = param === nothing ? [] : param
     strategy = strategy === nothing ? GridTraining : strategy
 
-    if draw_samples < 0
-        throw(error("Number of samples to be drawn has to be >=0."))
-    end
+    @assert alg.draw_samples≥0 "Number of samples to be drawn has to be >=0."
 
-    mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain,
-        strategy = strategy, dataset = dataset,
-        draw_samples = draw_samples,
-        init_params = init_params,
-        physdt = physdt, phynewstd = phynewstd,
-        l2std = l2std,
-        phystd = phystd,
-        priorsNNw = priorsNNw,
-        param = param,
-        nchains = nchains,
-        autodiff = autodiff,
-        Kernel = Kernel,
-        Adaptorkwargs = Adaptorkwargs,
-        Integratorkwargs = Integratorkwargs,
-        MCMCkwargs = MCMCkwargs,
-        progress = progress,
-        verbose = verbose,
-        estim_collocate = estim_collocate)
+    mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(
+        prob, chain; strategy, alg.dataset, alg.draw_samples, alg.init_params,
+        alg.physdt, alg.l2std, alg.phystd, alg.phynewstd, alg.priorsNNw, param, alg.nchains,
+        alg.autodiff, Kernel = alg.kernel, alg.Adaptorkwargs, alg.Integratorkwargs,
+        alg.MCMCkwargs, alg.progress, alg.verbose, alg.estim_collocate)
 
     fullsolution = BPINNstats(mcmcchain, samples, statistics)
     ninv = length(param)
     t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2])
 
-    if chain isa Lux.AbstractExplicitLayer
-        θinit, st = Lux.setup(Random.default_rng(), chain)
-        θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
-             for i in 1:max(draw_samples - draw_samples ÷ 10, draw_samples - 1000)]
+    θinit, st = LuxCore.setup(Random.default_rng(), chain)
+    θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
+         for i in 1:max(draw_samples - draw_samples ÷ 10, draw_samples - 1000)]
 
-        luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
-        # only need for size
-        θinit = collect(ComponentArrays.ComponentArray(θinit))
-    else
-        throw(error("Only Lux.AbstractExplicitLayer neural networks are supported"))
-    end
+    luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
+    # only need for size
+    θinit = collect(ComponentArray(θinit))
 
     # constructing ensemble predictions
     ensemblecurves = Vector{}[]
@@ -278,5 +226,5 @@ function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
                             for i in (nnparams + 1):(nnparams + ninv)]
     end
 
-    BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params, t)
-end
+    return BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params, t)
+end
\ No newline at end of file

From be7c3d4e088769ad10da0622c1945ec6fca70676 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Fri, 18 Oct 2024 21:55:11 +0530
Subject: [PATCH 106/107] managing conflicts 2

---
 src/PDE_BPINN.jl        | 185 +++++++-----
 src/advancedHMC_MCMC.jl | 603 +++++++++++++++-------------------------
 src/discretize.jl       |  51 +---
 3 files changed, 362 insertions(+), 477 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 9c283f1e00..4f7e51b3a0 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -4,17 +4,91 @@
     dataset <: Union{Nothing, Vector{<:Matrix{<:Real}}}
     priors <: Vector{<:Distribution}
     allstd::Vector{Vector{Float64}}
+    phynewstd::Vector{Float64}
     names::Tuple
     extraparams::Int
     init_params <: Union{AbstractVector, NamedTuple, ComponentArray}
-    full_loglikelihood
-    Φ
+    full_loglikelihood::Any
+    L2_loss2::Any
+    Φ::Any
 end
 
 function LogDensityProblems.logdensity(ltd::PDELogTargetDensity, θ)
     # for parameter estimation neccesarry to use multioutput case
-    return ltd.full_loglikelihood(setparameters(ltd, θ), ltd.allstd) + priorlogpdf(ltd, θ) +
-           L2LossData(ltd, θ)
+    if Tar.L2_loss2 === nothing
+        return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) +
+               priorlogpdf(Tar, θ) + L2LossData(Tar, θ)
+    else
+        return Tar.full_loglikelihood(setparameters(Tar, θ), Tar.allstd) +
+               priorlogpdf(Tar, θ) + L2LossData(Tar, θ) +
+               Tar.L2_loss2(setparameters(Tar, θ), Tar.phynewstd)
+    end
+end
+
+# you get a vector of losses
+function get_lossy(pinnrep, dataset, Dict_differentials)
+    eqs = pinnrep.eqs
+    depvars = pinnrep.depvars #depvar order is same as dataset
+
+    # Dict_differentials is filled with Differential operator => diff_i key-value pairs
+    # masking operation
+    eqs_new = substitute.(eqs, Ref(Dict_differentials))
+
+    to_subs, tobe_subs = get_symbols(dataset, depvars, eqs)
+
+    # for values of all depvars at corresponding indvar values in dataset, create dictionaries {Dict(x(t) => 1.0496435863173237, y(t) => 1.9227770685615337)}
+    # In each Dict, num form of depvar is key to its value at certain coords of indvars, n_dicts = n_rows_dataset(or n_indvar_coords_dataset)
+    eq_subs = [Dict(tobe_subs[depvar] => to_subs[depvar][i] for depvar in depvars)
+               for i in 1:size(dataset[1][:, 1])[1]]
+
+    # for each dataset point(eq_sub dictionary), substitute in masked equations
+    # n_collocated_equations = n_rows_dataset(or n_indvar_coords_dataset)
+    masked_colloc_equations = [[substitute(eq, eq_sub) for eq in eqs_new]
+                               for eq_sub in eq_subs]
+    # now we have vector of dataset depvar's collocated equations
+
+    # reverse dict for re-substituting values of Differential(t)(u(t)) etc
+    rev_Dict_differentials = Dict(value => key for (key, value) in Dict_differentials)
+
+    # unmask Differential terms in masked_colloc_equations
+    colloc_equations = [substitute.(masked_colloc_equation, Ref(rev_Dict_differentials))
+                        for masked_colloc_equation in masked_colloc_equations]
+
+    # nested vector of datafree_pde_loss_functions (as in discretize.jl)
+    # each sub vector has dataset's indvar coord's datafree_colloc_loss_function, n_subvectors = n_rows_dataset(or n_indvar_coords_dataset)
+    # zip each colloc equation with args for each build_loss call per equation vector
+    datafree_colloc_loss_functions = [[build_loss_function(pinnrep, eq, pde_indvar)
+                                       for (eq, pde_indvar, integration_indvar) in zip(
+                                          colloc_equation,
+                                          pinnrep.pde_indvars,
+                                          pinnrep.pde_integration_vars)]
+                                      for colloc_equation in colloc_equations]
+
+    return datafree_colloc_loss_functions
+end
+
+function get_symbols(dataset, depvars, eqs)
+    # take only values of depvars from dataset
+    depvar_vals = [dataset_i[:, 1] for dataset_i in dataset]
+    # order of pinnrep.depvars, depvar_vals, BayesianPINN.dataset must be same
+    to_subs = Dict(depvars .=> depvar_vals)
+
+    numform_vars = Symbolics.get_variables.(eqs)
+    Eq_vars = unique(reduce(vcat, numform_vars))
+    # got equation's depvar num format {x(t)} for use in substitute()
+
+    tobe_subs = Dict()
+    for a in depvars
+        for i in Eq_vars
+            expr = toexpr(i)
+            if (expr isa Expr) && (expr.args[1] == a)
+                tobe_subs[a] = i
+            end
+        end
+    end
+    # depvar symbolic and num format got, tobe_subs : Dict{Any, Any}(:y => y(t), :x => x(t))
+
+    return to_subs, tobe_subs
 end
 
 @views function setparameters(ltd::PDELogTargetDensity, θ)
@@ -55,8 +129,6 @@ function L2LossData(ltd::PDELogTargetDensity, θ)
 
     # dataset of form Vector[matrix_x, matrix_y, matrix_z]
     # matrix_i is of form [i,indvar1,indvar2,..] (needed in case if heterogenous domains)
-    # note that indvar1,indvar2.. cols can be different values for different depvar matrices
-    # dataset,phi order follows pinnrep.depvars orders of variables (order of declaration in @variables macro)
 
     # Phi is the trial solution for each NN in chain array
     # Creating logpdf( MvNormal(Phi(t,θ),std), dataset[i] )
@@ -90,6 +162,8 @@ function priorlogpdf(ltd::PDELogTargetDensity, θ)
     invlogpdf = sum((length(θ) - ltd.extraparams + 1):length(θ)) do i
         logpdf(invpriors[length(θ) - i + 1], θ[i])
     end
+
+    return invlogpdf + logpdf(nnwparams, θ[1:(length(θ) - ltd.extraparams)])
 end
 
 function integratorchoice(Integratorkwargs, initial_ϵ)
@@ -177,27 +251,6 @@ function inference(samples, pinnrep, saveats, numensemble, ℓπ)
     return ensemblecurves, estimatedLuxparams, estimated_params, timepoints
 end
 
-function integratorchoice(Integratorkwargs, initial_ϵ)
-    Integrator = Integratorkwargs[:Integrator]
-    if Integrator == JitteredLeapfrog
-        jitter_rate = Integratorkwargs[:jitter_rate]
-        Integrator(initial_ϵ, jitter_rate)
-    elseif Integrator == TemperedLeapfrog
-        tempering_rate = Integratorkwargs[:tempering_rate]
-        Integrator(initial_ϵ, tempering_rate)
-    else
-        Integrator(initial_ϵ)
-    end
-end
-
-function adaptorchoice(Adaptor, mma, ssa)
-    if Adaptor != AdvancedHMC.NoAdaptation()
-        Adaptor(mma, ssa)
-    else
-        AdvancedHMC.NoAdaptation()
-    end
-end
-
 """
     ahmc_bayesian_pinn_pde(pde_system, discretization;
         draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],
@@ -255,15 +308,12 @@ end
     releases.
 """
 function ahmc_bayesian_pinn_pde(pde_system, discretization;
-        draw_samples = 1000,
-        bcstd = [0.01], l2std = [0.05],
-        phystd = [0.05], phystdnew = [0.05], priorsNNw = (0.0, 2.0),
-        param = [], nchains = 1, Kernel = HMC(0.1, 30),
-        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+        draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],
+        phynewstd = [0.05], priorsNNw = (0.0, 2.0), param = [], nchains = 1,
+        Kernel = HMC(0.1, 30), Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
-        numensemble = floor(Int, draw_samples / 3), Dict_differentials = nothing,
-        progress = false, verbose = false)
+        numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)
     pinnrep = symbolic_discretize(pde_system, discretization)
     dataset_pde, dataset_bc = discretization.dataset
 
@@ -275,31 +325,31 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         # add case for if parameters present in bcs?
 
         train_sets_pde = get_dataset_train_points(pde_system.eqs,
-                dataset_pde,
-                pinnrep)
-        colloc_train_sets = [[hcat(train_sets_pde[i][:, j]...)' for i in eachindex(datafree_colloc_loss_functions[1])] for j in eachindex(datafree_colloc_loss_functions)]
+            dataset_pde,
+            pinnrep)
+        colloc_train_sets = [[hcat(train_sets_pde[i][:, j]...)'
+                              for i in eachindex(datafree_colloc_loss_functions[1])]
+                             for j in eachindex(datafree_colloc_loss_functions)]
 
         # for each datafree_colloc_loss_function create loss_functions by passing dataset's indvar coords as train_sets_pde.
         # placeholder strategy = GridTraining(0.1), datafree_bc_loss_function and train_sets_bc must be nothing
         # order of indvar coords will be same as corresponding depvar coords values in dataset provided in get_lossy() call.
         pde_loss_function_points = [merge_strategy_with_loglikelihood_function(
-            pinnrep,
-            GridTraining(0.1),
-            datafree_colloc_loss_functions[i],
-            nothing;
-            train_sets_pde = colloc_train_sets[i],
-            train_sets_bc = nothing)[1]
-                              for i in eachindex(datafree_colloc_loss_functions)]
-
-        function L2_loss2(θ, allstd)
-            stdpdesnew = allstd[4]
-
+                                        pinnrep,
+                                        GridTraining(0.1),
+                                        datafree_colloc_loss_functions[i],
+                                        nothing;
+                                        train_sets_pde = colloc_train_sets[i],
+                                        train_sets_bc = nothing)[1]
+                                    for i in eachindex(datafree_colloc_loss_functions)]
+
+        function L2_loss2(θ, phynewstd)
             # first vector of losses,from tuple -> pde losses, first[1] pde loss
-            pde_loglikelihoods = [sum([pde_loss_function(θ, stdpdesnew[i])
+            pde_loglikelihoods = [sum([pde_loss_function(θ, phynewstd[i])
                                        for (i, pde_loss_function) in enumerate(pde_loss_functions)])
                                   for pde_loss_functions in pde_loss_function_points]
 
-            # bc_loglikelihoods = [sum([bc_loss_function(θ, stdpdesnew[i]) for (i, bc_loss_function) in enumerate(pde_loss_function_points[1])]) for pde_loss_function_points in pde_loss_functions]
+            # bc_loglikelihoods = [sum([bc_loss_function(θ, phynewstd[i]) for (i, bc_loss_function) in enumerate(pde_loss_function_points[1])]) for pde_loss_function_points in pde_loss_functions]
             #                      for (j, bc_loss_function) in enumerate(bc_loss_functions)]
 
             return sum(pde_loglikelihoods)
@@ -368,18 +418,10 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # vector in case of N-dimensional domains
     strategy = discretization.strategy
 
-    # dimensions would be total no of params,initial_nnθ for Lux namedTuples 
-    ℓπ = PDELogTargetDensity(nparameters,
-        strategy,
-        dataset,
-        priors,
-        [phystd, bcstd, l2std, phystdnew],
-        names,
-        ninv,
-        initial_nnθ,
-        full_weighted_loglikelihood,
-        newloss,
-        Φ)
+    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
+    ℓπ = PDELogTargetDensity(
+        nparameters, strategy, dataset, priors, [phystd, bcstd, l2std], phynewstd,
+        names, ninv, initial_nnθ, full_weighted_loglikelihood, newloss, Φ)
 
     Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
     Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
@@ -394,10 +436,16 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         @printf("Current Prior Log-likelihood : %g\n", priorlogpdf(ℓπ, initial_θ))
         @printf("Current MSE against dataset Log-likelihood : %g\n",
             L2LossData(ℓπ, initial_θ))
+        if !(newloss isa Nothing)
+            @printf("Current new loss : %g\n",
+                ℓπ.L2_loss2(setparameters(ℓπ, initial_θ),
+                    ℓπ.phynewstd))
+        end
     end
 
     # parallel sampling option
     if nchains != 1
+
         # Cache to store the chains
         bpinnsols = Vector{Any}(undef, nchains)
 
@@ -441,11 +489,16 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
 
         if verbose
             @printf("Sampling Complete.\n")
-            @printf("Current Physics Log-likelihood : %g\n",
+            @printf("Final Physics Log-likelihood : %g\n",
                 ℓπ.full_loglikelihood(setparameters(ℓπ, samples[end]), ℓπ.allstd))
-            @printf("Current Prior Log-likelihood : %g\n", priorlogpdf(ℓπ, samples[end]))
-            @printf("Current MSE against dataset Log-likelihood : %g\n",
+            @printf("Final Prior Log-likelihood : %g\n", priorlogpdf(ℓπ, samples[end]))
+            @printf("Final MSE against dataset Log-likelihood : %g\n",
                 L2LossData(ℓπ, samples[end]))
+            if !(newloss isa Nothing)
+                @printf("Final L2_LOSSY : %g\n",
+                    ℓπ.L2_loss2(setparameters(ℓπ, samples[end]),
+                        ℓπ.phynewstd))
+            end
         end
 
         fullsolution = BPINNstats(mcmc_chain, samples, stats)
@@ -455,4 +508,4 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         return BPINNsolution(
             fullsolution, ensemblecurves, estimnnparams, estimated_params, timepoints)
     end
-end
+end
\ No newline at end of file
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 8b996fce5c..5ac4213c92 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -1,72 +1,42 @@
-mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
-    P <: Vector{<:Distribution},
-    D <:
-    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
-}
+@concrete struct LogTargetDensity
     dim::Int
-    prob::SciMLBase.ODEProblem
-    chain::C
-    st::S
-    strategy::ST
-    dataset::D
-    priors::P
+    prob <: SciMLBase.ODEProblem
+    smodel <: StatefulLuxLayer
+    strategy <: AbstractTrainingStrategy
+    dataset <: Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
+    priors <: Vector{<:Distribution}
     phystd::Vector{Float64}
     phynewstd::Vector{Float64}
     l2std::Vector{Float64}
     autodiff::Bool
     physdt::Float64
     extraparams::Int
-    init_params::I
+    init_params <: Union{NamedTuple, ComponentArray}
     estim_collocate::Bool
+end
 
-    function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
-            dataset,
-            priors, phystd, phynewstd, l2std, autodiff, physdt, extraparams,
-            init_params::AbstractVector, estim_collocate)
-        new{
-            typeof(chain),
-            Nothing,
-            typeof(strategy),
-            typeof(init_params),
-            typeof(priors),
-            typeof(dataset)
-        }(dim,
-            prob,
-            chain,
-            nothing, strategy,
-            dataset,
-            priors,
-            phystd,
-            phynewstd,
-            l2std,
-            autodiff,
-            physdt,
-            extraparams,
-            init_params,
-            estim_collocate)
-    end
-    function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
-            dataset,
-            priors, phystd, phynewstd, l2std, autodiff, physdt, extraparams,
-            init_params::NamedTuple, estim_collocate)
-        new{
-            typeof(chain),
-            typeof(st),
-            typeof(strategy),
-            typeof(init_params),
-            typeof(priors),
-            typeof(dataset)
-        }(dim,
-            prob,
-            chain, st, strategy,
-            dataset, priors,
-            phystd, phynewstd,
-             l2std,
-            autodiff,
-            physdt,
-            extraparams,
-            init_params,
-            estim_collocate)
+"""
+NN OUTPUT AT t,θ ~ phi(t,θ).
+"""
+function (f::LogTargetDensity)(t::AbstractVector, θ)
+    θ = vector_to_parameters(θ, f.init_params)
+    dev = safe_get_device(θ)
+    t = safe_expand(dev, t)
+    u0 = f.prob.u0 |> dev
+    return u0 .+ (t' .- f.prob.tspan[1]) .* f.smodel(t', θ)
+end
+
+(f::LogTargetDensity)(t::Number, θ) = f([t], θ)[:, 1]
+
+"""
+Similar to ode_dfdx() in NNODE.
+"""
+function ode_dfdx(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
+    if autodiff
+        return ForwardDiff.jacobian(Base.Fix2(phi, θ), t)
+    else
+        ϵ = sqrt(eps(eltype(t)))
+        return (phi(t .+ ϵ, θ) .- phi(t, θ)) ./ ϵ
     end
 end
 
@@ -74,344 +44,239 @@ end
 Function needed for converting vector of sampled parameters into ComponentVector in case of Lux chain output, derivatives
 the sampled parameters are of exotic type `Dual` due to ForwardDiff's autodiff tagging.
 """
-function vector_to_parameters(ps_new::AbstractVector,
-        ps::Union{NamedTuple, ComponentArrays.ComponentVector})
-    @assert length(ps_new) == Lux.parameterlength(ps)
+function vector_to_parameters(ps_new::AbstractVector, ps::Union{NamedTuple, ComponentArray})
+    @assert length(ps_new) == LuxCore.parameterlength(ps)
     i = 1
     function get_ps(x)
         z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
         i += length(x)
         return z
     end
-    return Functors.fmap(get_ps, ps)
+    return fmap(get_ps, ps)
 end
 
-vector_to_parameters(ps_new::AbstractVector, ps::AbstractVector) = ps_new
+vector_to_parameters(ps_new::AbstractVector, _::AbstractVector) = ps_new
 
-function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
-    if Tar.estim_collocate
-        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ) +
-               L2loss2(Tar, θ)
-    else
-        return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
-    end
+function LogDensityProblems.logdensity(ltd::LogTargetDensity, θ)
+    ldensity = physloglikelihood(ltd, θ) + priorweights(ltd, θ) + L2LossData(ltd, θ)
+    ltd.estim_collocate && return ldensity + L2loss2(ltd, θ)
+    return ldensity
 end
 
-LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
+LogDensityProblems.dimension(ltd::LogTargetDensity) = ltd.dim
 
 function LogDensityProblems.capabilities(::LogTargetDensity)
-    LogDensityProblems.LogDensityOrder{1}()
+    return LogDensityProblems.LogDensityOrder{1}()
 end
 
 """
 suggested extra loss function for ODE solver case
 """
-function L2loss2(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
+@views function L2loss2(ltd::LogTargetDensity, θ)
+    ltd.extraparams ≤ 0 && return false  # XXX: type-stability?
 
-    # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        autodiff = Tar.autodiff
-        # Timepoints to enforce Physics 
-        t = Tar.dataset[end]
-        u1 = Tar.dataset[2]
-        û = Tar.dataset[1]
-
-        nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
-
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
-
-        if length(Tar.prob.u0) == 1
-            physsol = [f(û[i],
-                ode_params,
-                t[i])
-                       for i in 1:length(û[:, 1])]
-        else
-            physsol = [f([û[i], u1[i]],
-                ode_params,
-                t[i])
-                       for i in 1:length(û)]
-        end
-        #form of NN output matrix output dim x n 
-        deri_physsol = reduce(hcat, physsol)
-   
-        physlogprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # can add phystdnew[i] for u[i] 
-            physlogprob += logpdf(MvNormal(deri_physsol[i, :],
-                    LinearAlgebra.Diagonal(map(abs2,
-                        (Tar.phynewstd[i]) .*
-                        ones(length(nnsol[i, :]))))),
-                nnsol[i, :])
-        end
-        return physlogprob
+    f = ltd.prob.f
+    t = ltd.dataset[end]
+    u1 = ltd.dataset[2]
+    û = ltd.dataset[1]
+
+    nnsol = ode_dfdx(ltd, t, θ[1:(length(θ) - ltd.extraparams)], ltd.autodiff)
+
+    ode_params = ltd.extraparams == 1 ? θ[((length(θ) - ltd.extraparams) + 1)] :
+                 θ[((length(θ) - ltd.extraparams) + 1):length(θ)]
+
+    physsol = if length(ltd.prob.u0) == 1
+        [f(û[i], ode_params, tᵢ) for (i, tᵢ) in enumerate(t)]
     else
-        return 0
+        [f([û[i], u1[i]], ode_params, tᵢ) for (i, tᵢ) in enumerate(t)]
+    end
+    # form of NN output matrix output dim x n
+    deri_physsol = reduce(hcat, physsol)
+    T = promote_type(eltype(deri_physsol), eltype(nnsol))
+
+    physlogprob = T(0)
+    for i in 1:length(ltd.prob.u0)
+        physlogprob += logpdf(
+            MvNormal(deri_physsol[i, :],
+                Diagonal(abs2.(T(ltd.phynewstd[i]) .* ones(T, length(nnsol[i, :]))))),
+            nnsol[i, :]
+        )
     end
+    return physlogprob
 end
 
 """
 L2 loss loglikelihood(needed for ODE parameter estimation).
 """
-function L2LossData(Tar::LogTargetDensity, θ)
-    # check if dataset is provided
-    if Tar.dataset isa Vector{Nothing} || Tar.extraparams == 0
-        return 0
-    else
-        # matrix(each row corresponds to vector u's rows)
-        nn = Tar(Tar.dataset[end], θ[1:(length(θ) - Tar.extraparams)])
-
-        L2logprob = 0
-        for i in 1:length(Tar.prob.u0)
-            # for u[i] ith vector must be added to dataset, nn[1,:] is the dx in lotka_volterra
-            L2logprob += logpdf(
-                MvNormal(nn[i, :],
-                    LinearAlgebra.Diagonal(abs2.(Tar.l2std[i] .*
-                                                 ones(length(Tar.dataset[i]))))),
-                Tar.dataset[i])
-        end
-        return L2logprob
+@views function L2LossData(ltd::LogTargetDensity, θ)
+    (ltd.dataset isa Vector{Nothing} || ltd.extraparams == 0) && return 0
+
+    # matrix(each row corresponds to vector u's rows)
+    nn = ltd(ltd.dataset[end], θ[1:(length(θ) - ltd.extraparams)])
+    T = eltype(nn)
+
+    L2logprob = zero(T)
+    for i in 1:length(ltd.prob.u0)
+        # for u[i] ith vector must be added to dataset,nn[1, :] is the dx in lotka_volterra
+        L2logprob += logpdf(
+            MvNormal(
+                nn[i, :],
+                Diagonal(abs2.(T(ltd.l2std[i]) .* ones(T, length(ltd.dataset[i]))))
+            ),
+            ltd.dataset[i]
+        )
     end
+    return L2logprob
 end
 
 """
 Physics loglikelihood over problem timespan + dataset timepoints.
 """
-function physloglikelihood(Tar::LogTargetDensity, θ)
-    f = Tar.prob.f
-    p = Tar.prob.p
-    tspan = Tar.prob.tspan
-    autodiff = Tar.autodiff
-    strategy = Tar.strategy
+function physloglikelihood(ltd::LogTargetDensity, θ)
+    (; f, p, tspan) = ltd.prob
+    (; autodiff, strategy) = ltd
 
     # parameter estimation chosen or not
-    if Tar.extraparams > 0
-        ode_params = Tar.extraparams == 1 ?
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
-                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+    if ltd.extraparams > 0
+        ode_params = ltd.extraparams == 1 ? θ[((length(θ) - ltd.extraparams) + 1)] :
+                     θ[((length(θ) - ltd.extraparams) + 1):length(θ)]
     else
-        ode_params = p == SciMLBase.NullParameters() ? [] : p
+        ode_params = p isa SciMLBase.NullParameters ? Float64[] : p
     end
 
-    return getlogpdf(strategy, Tar, f, autodiff, tspan, ode_params, θ)
+    return getlogpdf(strategy, ltd, f, autodiff, tspan, ode_params, θ)
 end
 
-function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
-        tspan,
-        ode_params, θ)
-    if Tar.dataset isa Vector{Nothing}
-        t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
-    else
-        t = vcat(collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]),
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
+function getlogpdf(strategy::GridTraining, ltd::LogTargetDensity, f, autodiff::Bool,
+        tspan, ode_params, θ)
+    ts = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
+    t = ltd.dataset isa Vector{Nothing} ? ts : vcat(ts, ltd.dataset[end])
+    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
 end
 
-function getlogpdf(strategy::StochasticTraining,
-        Tar::LogTargetDensity,
-        f,
-        autodiff::Bool,
-        tspan,
-        ode_params,
-        θ)
-    if Tar.dataset isa Vector{Nothing}
-        t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
-    else
-        t = vcat([(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)],
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
+function getlogpdf(strategy::StochasticTraining, ltd::LogTargetDensity,
+        f, autodiff::Bool, tspan, ode_params, θ)
+    T = promote_type(eltype(tspan[1]), eltype(tspan[2]))
+    samples = (tspan[2] - tspan[1]) .* rand(T, strategy.points) .+ tspan[1]
+    t = ltd.dataset isa Vector{Nothing} ? samples : vcat(samples, ltd.dataset[end])
+    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
 end
 
-function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
-        autodiff::Bool,
-        tspan,
-        ode_params, θ)
-    function integrand(t::Number, θ)
-        innerdiff(Tar, f, autodiff, [t], θ, ode_params)
-    end
+function getlogpdf(strategy::QuadratureTraining, ltd::LogTargetDensity, f, autodiff::Bool,
+        tspan, ode_params, θ)
+    integrand(t::Number, θ) = innerdiff(ltd, f, autodiff, [t], θ, ode_params)
     intprob = IntegralProblem(
-        integrand, (tspan[1], tspan[2]), θ; nout = length(Tar.prob.u0))
-    sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
-    sum(sol.u)
+        integrand, (tspan[1], tspan[2]), θ; nout = length(ltd.prob.u0))
+    sol = solve(intprob, QuadGKJL(); strategy.abstol, strategy.reltol)
+    return sum(sol.u)
 end
 
-function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
-        autodiff::Bool,
-        tspan,
-        ode_params, θ)
-    minT = tspan[1]
-    maxT = tspan[2]
-
+function getlogpdf(strategy::WeightedIntervalTraining, ltd::LogTargetDensity, f,
+        autodiff::Bool, tspan, ode_params, θ)
+    minT, maxT = tspan
     weights = strategy.weights ./ sum(strategy.weights)
-
     N = length(weights)
-    points = strategy.points
-
     difference = (maxT - minT) / N
 
-    data = Float64[]
+    ts = eltype(difference)[]
     for (index, item) in enumerate(weights)
-        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
+        temp_data = rand(1, trunc(Int, strategy.points * item)) .* difference .+ minT .+
                     ((index - 1) * difference)
-        data = append!(data, temp_data)
+        append!(ts, temp_data)
     end
 
-    if Tar.dataset isa Vector{Nothing}
-        t = data
-    else
-        t = vcat(data,
-            Tar.dataset[end])
-    end
-
-    sum(innerdiff(Tar, f, autodiff, t, θ,
-        ode_params))
+    t = ltd.dataset isa Vector{Nothing} ? ts : vcat(ts, ltd.dataset[end])
+    return sum(innerdiff(ltd, f, autodiff, t, θ, ode_params))
 end
 
 """
 MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ.
 """
-function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
+@views function innerdiff(ltd::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
         ode_params)
+    # ltd used for phi and LogTargetDensity object attributes access
+    out = ltd(t, θ[1:(length(θ) - ltd.extraparams)])
 
-    # Tar used for phi and LogTargetDensity object attributes access
-    out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
-
-    # # reject samples case(write clear reason why)
-    if any(isinf, out[:, 1]) || any(isinf, ode_params)
-        return -Inf
-    end
+    # reject samples case(write clear reason why)
+    (any(isinf, out[:, 1]) || any(isinf, ode_params)) && return convert(eltype(out), -Inf)
 
     # this is a vector{vector{dx,dy}}(handle case single u(float passed))
     if length(out[:, 1]) == 1
-        physsol = [f(out[:, i][1],
-                       ode_params,
-                       t[i])
-                   for i in 1:length(out[1, :])]
+        physsol = [f(out[:, i][1], ode_params, t[i]) for i in 1:length(out[1, :])]
     else
-        physsol = [f(out[:, i],
-                       ode_params,
-                       t[i])
-                   for i in 1:length(out[1, :])]
+        physsol = [f(out[:, i], ode_params, t[i]) for i in 1:length(out[1, :])]
     end
     physsol = reduce(hcat, physsol)
 
-    nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+    nnsol = ode_dfdx(ltd, t, θ[1:(length(θ) - ltd.extraparams)], autodiff)
 
     vals = nnsol .- physsol
+    T = eltype(vals)
 
-    # N dimensional vector if N outputs for NN(each row has logpdf of u[i] where u is vector of dependant variables)
+    # N dimensional vector if N outputs for NN(each row has logpdf of u[i] where u is vector
+    # of dependant variables)
     return [logpdf(
                 MvNormal(vals[i, :],
-                    LinearAlgebra.Diagonal(abs2.(Tar.phystd[i] .*
-                                                 ones(length(vals[i, :]))))),
-                zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
+                    Diagonal(abs2.(T(ltd.phystd[i]) .* ones(T, length(vals[i, :]))))),
+                zeros(T, length(vals[i, :]))
+            ) for i in 1:length(ltd.prob.u0)]
 end
 
 """
 Prior logpdf for NN parameters + ODE constants.
 """
-function priorweights(Tar::LogTargetDensity, θ)
-    allparams = Tar.priors
-    # nn weights
-    nnwparams = allparams[1]
-
-    if Tar.extraparams > 0
-        # Vector of ode parameters priors
-        invpriors = allparams[2:end]
-
-        invlogpdf = sum(
-            logpdf(invpriors[length(θ) - i + 1], θ[i])
-            for i in (length(θ) - Tar.extraparams + 1):length(θ);
-            init = 0.0)
-
-        return (invlogpdf
-                +
-                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
-    else
-        return logpdf(nnwparams, θ)
-    end
-end
+@views function priorweights(ltd::LogTargetDensity, θ)
+    allparams = ltd.priors
+    nnwparams = allparams[1] # nn weights
 
-function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    return init_params, chain, st
-end
+    ltd.extraparams ≤ 0 && return logpdf(nnwparams, θ)
 
-function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    return θ, chain, st
-end
+    # Vector of ode parameters priors
+    invpriors = allparams[2:end]
 
-"""
-NN OUTPUT AT t,θ ~ phi(t,θ).
-"""
-function (f::LogTargetDensity{C, S})(t::AbstractVector,
-        θ) where {C <: Lux.AbstractExplicitLayer, S}
-    θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
+    invlogpdf = sum(
+        logpdf(invpriors[length(θ) - i + 1], θ[i])
+    for i in (length(θ) - ltd.extraparams + 1):length(θ))
+
+    return invlogpdf + logpdf(nnwparams, θ[1:(length(θ) - ltd.extraparams)])
 end
 
-function (f::LogTargetDensity{C, S})(t::Number,
-        θ) where {C <: Lux.AbstractExplicitLayer, S}
-    θ = vector_to_parameters(θ, f.init_params)
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
-    ChainRulesCore.@ignore_derivatives f.st = st
-    f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y
+function generate_ltd(chain::AbstractLuxLayer, init_params)
+    return init_params, chain, LuxCore.initialstates(Random.default_rng(), chain)
 end
 
-"""
-Similar to ode_dfdx() in NNODE.
-"""
-function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
-    if autodiff
-        hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...)
-    else
-        (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
-    end
+function generate_ltd(chain::AbstractLuxLayer, ::Nothing)
+    θ, st = LuxCore.setup(Random.default_rng(), chain)
+    return θ, chain, st
 end
 
 function kernelchoice(Kernel, MCMCkwargs)
     if Kernel == HMCDA
-        δ, λ = MCMCkwargs[:δ], MCMCkwargs[:λ]
-        Kernel(δ, λ)
+        Kernel(MCMCkwargs[:δ], MCMCkwargs[:λ])
     elseif Kernel == NUTS
         δ, max_depth, Δ_max = MCMCkwargs[:δ], MCMCkwargs[:max_depth], MCMCkwargs[:Δ_max]
-        Kernel(δ, max_depth = max_depth, Δ_max = Δ_max)
-    else
-        # HMC
-        n_leapfrog = MCMCkwargs[:n_leapfrog]
-        Kernel(n_leapfrog)
+        Kernel(δ; max_depth, Δ_max)
+    else # HMC
+        Kernel(MCMCkwargs[:n_leapfrog])
     end
 end
 
 """
-    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
-                        dataset = [nothing],init_params = nothing,
-                        draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
-                        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
-                        param = [], nchains = 1, autodiff = false, Kernel = HMC,
-                        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-                                         Metric = DiagEuclideanMetric,
-                                         targetacceptancerate = 0.8),
-                        Integratorkwargs = (Integrator = Leapfrog,),
-                        MCMCkwargs = (n_leapfrog = 30,),
-                        progress = false, verbose = false)
+    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining, dataset = [nothing],
+                           init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0f0,
+                           l2std = [0.05], phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
+                           param = [], nchains = 1, autodiff = false, Kernel = HMC,
+                           Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+                               Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+                           Integratorkwargs = (Integrator = Leapfrog,),
+                           MCMCkwargs = (n_leapfrog = 30,), progress = false,
+                           verbose = false)
 
 !!! warn
 
-    Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the out-of-place form, i.e.
-    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the `ahmc_bayesian_pinn_ode()`
-    will exit with an error.
+    Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the
+    out-of-place form, i.e. `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared
+    out-of-place, then `ahmc_bayesian_pinn_ode()` will exit with an error.
 
 ## Example
 
@@ -463,22 +328,29 @@ Incase you are only solving the Equations for solution, do not provide dataset
 
 ## Keyword Arguments
 
-* `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
-* `init_params`: initial parameter values for BPINN (ideally for multiple chains different initializations preferred)
+* `strategy`: The training strategy used to choose the points for the evaluations. By
+  default GridTraining is used with given physdt discretization.
+* `init_params`: initial parameter values for BPINN (ideally for multiple chains different
+  initializations preferred)
 * `nchains`: number of chains you want to sample
-* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
+* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are
+  ~2/3 of draw samples)
 * `l2std`: standard deviation of BPINN prediction against L2 losses/Dataset
 * `phystd`: standard deviation of BPINN prediction against Chosen Underlying ODE System
 * `phynewstd`: standard deviation of new loss func term
-* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of BPINN are Normal Distributions by default.
+* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of
+  BPINN are Normal Distributions by default.
 * `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
 * `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
 * `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
 * `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implementations HMC/NUTS/HMCDA)
-* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-    Note: Target percentage(in decimal) of iterations in which the proposals are accepted (0.8 by default)
-* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's(HMC/NUTS/HMCDA) Arguments, as follows :
+* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`.
+  Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`.
+  Refer: https://turinglang.org/AdvancedHMC.jl/stable/ Note: Target percentage (in decimal)
+  of iterations in which the proposals are accepted (0.8 by default)
+* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's (HMC/NUTS/HMCDA)
+  Arguments, as follows :
     * `n_leapfrog`: number of leapfrog steps for HMC
     * `δ`: target acceptance probability for NUTS and HMCDA
     * `λ`: target trajectory length for HMCDA
@@ -488,67 +360,53 @@ Incase you are only solving the Equations for solution, do not provide dataset
 * `progress`: controls whether to show the progress meter or not.
 * `verbose`: controls the verbosity. (Sample call args in AHMC)
 
-## Warnings
+!!! warning
 
-* AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
+    AdvancedHMC.jl is still developing convenience structs so might need changes on new
+    releases.
 """
-function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
-        strategy = GridTraining, dataset = [nothing],
-        init_params = nothing, draw_samples = 1000,
-        physdt = 1 / 20.0, l2std = [0.05],
-        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
-        param = [], nchains = 1, autodiff = false,
-        Kernel = HMC,
+function ahmc_bayesian_pinn_ode(
+        prob::SciMLBase.ODEProblem, chain; strategy = GridTraining, dataset = [nothing],
+        init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0, l2std = [0.05],
+        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0), param = [], nchains = 1,
+        autodiff = false, Kernel = HMC,
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-        Integratorkwargs = (Integrator = Leapfrog,),
-        MCMCkwargs = (n_leapfrog = 30,),
-        progress = false, verbose = false,
-        estim_collocate = false)
-    !(chain isa Lux.AbstractExplicitLayer) &&
-        (chain = adapt(FromFluxAdaptor(false, false), chain))
-    # NN parameter prior mean and variance(PriorsNN must be a tuple)
-    if isinplace(prob)
-        throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
-    end
+        Integratorkwargs = (Integrator = Leapfrog,), MCMCkwargs = (n_leapfrog = 30,),
+        progress = false, verbose = false, estim_collocate = false)
+    @assert !isinplace(prob) "The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."
+
+    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
 
     strategy = strategy == GridTraining ? strategy(physdt) : strategy
 
     if dataset != [nothing] &&
        (length(dataset) < 2 || !(dataset isa Vector{<:Vector{<:AbstractFloat}}))
-        throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
+        error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}")
     end
 
     if dataset != [nothing] && param == []
         println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
     elseif dataset == [nothing] && param != []
-        throw(error("Dataset Required for Parameter Estimation."))
+        error("Dataset Required for Parameter Estimation.")
     end
 
-    if chain isa Lux.AbstractExplicitLayer
-        # Lux-Named Tuple
-        initial_nnθ, recon, st = generate_Tar(chain, init_params)
-    else
-        error("Only Lux.AbstractExplicitLayer Neural networks are supported")
-    end
+    initial_nnθ, chain, st = generate_ltd(chain, init_params)
 
-    if nchains > Threads.nthreads()
-        throw(error("number of chains is greater than available threads"))
-    elseif nchains < 1
-        throw(error("number of chains must be greater than 1"))
-    end
+    @assert nchains≤Threads.nthreads() "number of chains is greater than available threads"
+    @assert nchains≥1 "number of chains must be greater than 1"
 
     # eltype(physdt) cause needs Float64 for find_good_stepsize
     # Lux chain(using component array later as vector_to_parameter need namedtuple)
-    initial_θ = collect(eltype(physdt),
-        vcat(ComponentArrays.ComponentArray(initial_nnθ)))
+    T = eltype(physdt)
+    initial_θ = getdata(ComponentArray{T}(initial_nnθ))
 
     # adding ode parameter estimation
     nparameters = length(initial_θ)
     ninv = length(param)
     priors = [
-        MvNormal(priorsNNw[1] * ones(nparameters),
-        LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
+        MvNormal(T(priorsNNw[1]) * ones(T, nparameters),
+        Diagonal(abs2.(T(priorsNNw[2]) .* ones(T, nparameters))))
     ]
 
     # append Ode params to all paramvector
@@ -560,30 +418,25 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
     end
 
     t0 = prob.tspan[1]
+    smodel = StatefulLuxLayer{true}(chain, nothing, st)
     # dimensions would be total no of params,initial_nnθ for Lux namedTuples
-    ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
+    ℓπ = LogTargetDensity(nparameters, prob, smodel, strategy, dataset, priors,
         phystd, phynewstd, l2std, autodiff, physdt, ninv, initial_nnθ, estim_collocate)
 
-    try
-        ℓπ(t0, initial_θ[1:(nparameters - ninv)])
-    catch err
-        if isa(err, DimensionMismatch)
-            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
-        else
-            throw(err)
+    if verbose
+        @printf("Current Physics Log-likelihood: %g\n", physloglikelihood(ℓπ, initial_θ))
+        @printf("Current Prior Log-likelihood: %g\n", priorweights(ℓπ, initial_θ))
+        @printf("Current MSE against dataset Log-likelihood: %g\n",
+            L2LossData(ℓπ, initial_θ))
+        if estim_collocate
+            @printf("Current gradient loss against dataset Log-likelihood: %g\n",
+                L2loss2(ℓπ, initial_θ))
         end
     end
 
-    @info("Current Physics Log-likelihood : ", physloglikelihood(ℓπ, initial_θ))
-    @info("Current Prior Log-likelihood : ", priorweights(ℓπ, initial_θ))
-    @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ))
-    if estim_collocate
-        @info("Current gradient loss against dataset Log-likelihood : ",
-            L2loss2(ℓπ, initial_θ))
-    end
-
-    Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
-    Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
+    Adaptor = Adaptorkwargs[:Adaptor]
+    Metric = Adaptorkwargs[:Metric]
+    targetacceptancerate = Adaptorkwargs[:targetacceptancerate]
 
     # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
     metric = Metric(nparameters)
@@ -598,8 +451,10 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
 
         Threads.@threads for i in 1:nchains
             # each chain has different initial NNparameter values(better posterior exploration)
-            initial_θ = vcat(randn(nparameters - ninv),
-                initial_θ[(nparameters - ninv + 1):end])
+            initial_θ = vcat(
+                randn(eltype(initial_θ), nparameters - ninv),
+                initial_θ[(nparameters - ninv + 1):end]
+            )
             initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
             integrator = integratorchoice(Integratorkwargs, initial_ϵ)
             adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
@@ -612,7 +467,7 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
 
             samplesc[i] = samples
             statsc[i] = stats
-            mcmc_chain = Chains(hcat(samples...)')
+            mcmc_chain = Chains(reduce(hcat, samples)')
             chains[i] = mcmc_chain
         end
 
@@ -628,13 +483,17 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
         samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
             adaptor; progress = progress, verbose = verbose)
 
-        @info("Sampling Complete.")
-        @info("Final Physics Log-likelihood : ", physloglikelihood(ℓπ, samples[end]))
-        @info("Final Prior Log-likelihood : ", priorweights(ℓπ, samples[end]))
-        @info("Final MSE against dataset Log-likelihood : ", L2LossData(ℓπ, samples[end]))
-        if estim_collocate
-            @info("Final gradient loss against dataset Log-likelihood : ",
-                L2loss2(ℓπ, samples[end]))
+        if verbose
+            println("Sampling Complete.")
+            @printf("Final Physics Log-likelihood: %g\n",
+                physloglikelihood(ℓπ, samples[end]))
+            @printf("Final Prior Log-likelihood: %g\n", priorweights(ℓπ, samples[end]))
+            @printf("Final MSE against dataset Log-likelihood: %g\n",
+                L2LossData(ℓπ, samples[end]))
+            if estim_collocate
+                @printf("Final gradient loss against dataset Log-likelihood: %g\n",
+                    L2loss2(ℓπ, samples[end]))
+            end
         end
 
         # return a chain(basic chain),samples and stats
diff --git a/src/discretize.jl b/src/discretize.jl
index 757c1f8b8f..5187a0638a 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -445,14 +445,13 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
     # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
     num_additional_loss = convert(Int, additional_loss !== nothing)
 
-        adaloss_T = eltype(adaloss.pde_loss_weights)
+    adaloss_T = eltype(adaloss.pde_loss_weights)
 
-        # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
-        adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .*
-                                   adaloss.pde_loss_weights
-        adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
-        adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
-                                          adaloss.additional_loss_weights
+    # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
+    adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .* adaloss.pde_loss_weights
+    adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
+    adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
+                                      adaloss.additional_loss_weights
 
     reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
         pde_loss_functions, bc_loss_functions)
@@ -521,36 +520,10 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
             return full_weighted_loss
         end
 
-        return bc_loss_functions, pde_loss_functions, full_loss_function
+        return full_loss_function
     end
 
     function get_likelihood_estimate_function(discretization::BayesianPINN)
-        # Because separate reweighting code section needed and loglikelihood is pointwise independent
-        pde_loss_functions, bc_loss_functions = merge_strategy_with_loglikelihood_function(
-            pinnrep,
-            strategy,
-            datafree_pde_loss_functions,
-            datafree_bc_loss_functions)
-
-        # setup for all adaptive losses
-        num_pde_losses = length(pde_loss_functions)
-        num_bc_losses = length(bc_loss_functions)
-        # assume one single additional loss function if there is one. this means that the user needs to lump all their functions into a single one,
-        num_additional_loss = additional_loss isa Nothing ? 0 : 1
-
-        adaloss_T = eltype(adaloss.pde_loss_weights)
-
-        # this will error if the user has provided a number of initial weights that is more than 1 and doesn't match the number of loss functions
-        adaloss.pde_loss_weights = ones(adaloss_T, num_pde_losses) .*
-                                   adaloss.pde_loss_weights
-        adaloss.bc_loss_weights = ones(adaloss_T, num_bc_losses) .* adaloss.bc_loss_weights
-        adaloss.additional_loss_weights = ones(adaloss_T, num_additional_loss) .*
-                                          adaloss.additional_loss_weights
-
-        reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
-            pde_loss_functions,
-            bc_loss_functions)
-
         dataset_pde, dataset_bc = discretization.dataset
 
         # required as Physics loss also needed on the discrete dataset domain points
@@ -566,7 +539,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
         end
 
         function full_loss_function(θ, allstd::Vector{Vector{Float64}})
-            stdpdes, stdbcs, stdextra, stdpdesnew = allstd
+            stdpdes, stdbcs, stdextra = allstd
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
             pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
                                   for (i, pde_loss_function) in enumerate(pde_loss_functions)]
@@ -578,6 +551,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
                 pde_loglikelihoods += [logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
                                        for (j, pde_loss_function) in enumerate(datapde_loss_functions)]
             end
+
             if !(databc_loss_functions isa Nothing)
                 bc_loglikelihoods += [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
                                       for (j, bc_loss_function) in enumerate(databc_loss_functions)]
@@ -618,11 +592,10 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
             return full_weighted_loglikelihood
         end
 
-        return bc_loss_functions, pde_loss_functions, full_loss_function
+        return full_loss_function
     end
 
-    bc_loss_functions, pde_loss_functions, full_loss_function = get_likelihood_estimate_function(discretization)
-
+    full_loss_function = get_likelihood_estimate_function(discretization)
     pinnrep.loss_functions = PINNLossFunctions(bc_loss_functions, pde_loss_functions,
         full_loss_function, additional_loss, datafree_pde_loss_functions,
         datafree_bc_loss_functions)
@@ -641,4 +614,4 @@ function SciMLBase.discretize(pde_system::PDESystem, discretization::PhysicsInfo
     pinnrep = symbolic_discretize(pde_system, discretization)
     f = OptimizationFunction(pinnrep.loss_functions.full_loss_function, AutoZygote())
     return Optimization.OptimizationProblem(f, pinnrep.flat_init_params)
-end
+end
\ No newline at end of file

From a3c19b723bddff4d829e66b98922f4a402a27926 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Sat, 19 Oct 2024 07:00:55 +0530
Subject: [PATCH 107/107] update project.toml

---
 Project.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 2544e1a82e..d0ddd63ccf 100644
--- a/Project.toml
+++ b/Project.toml
@@ -114,7 +114,6 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
-DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"