From 23c2a3dd0b9c644215dc6dbd1303d022171fc4ec Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Fri, 27 Sep 2024 22:52:56 -0400
Subject: [PATCH 1/7] Update docs remove extra returns from loss and extra args
 from callback

---
 docs/src/getting_started/fit_simulation.md | 29 +++++++++-------------
 docs/src/showcase/blackhole.md             |  9 +++----
 docs/src/showcase/missing_physics.md       |  2 +-
 docs/src/showcase/pinngpu.md               |  2 +-
 4 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/docs/src/getting_started/fit_simulation.md b/docs/src/getting_started/fit_simulation.md
index d5631c28f6..cfef3f3bf7 100644
--- a/docs/src/getting_started/fit_simulation.md
+++ b/docs/src/getting_started/fit_simulation.md
@@ -99,12 +99,14 @@ function loss(newp)
     newprob = remake(prob, p = newp)
     sol = solve(newprob, saveat = 1)
     loss = sum(abs2, sol .- xy_data)
-    return loss, sol
+    return loss
 end
 
 # Define a callback function to monitor optimization progress
-function callback(p, l, sol)
+function callback(state, l)
     display(l)
+    newprob = remake(prob, p = state.u)
+    sol = solve(newprob, saveat = 1)
     plt = plot(sol, ylim = (0, 6), label = ["Current x Prediction" "Current y Prediction"])
     scatter!(plt, t_data, xy_data', label = ["x Data" "y Data"])
     display(plt)
@@ -278,21 +280,13 @@ function loss(newp)
     newprob = remake(prob, p = newp)
     sol = solve(newprob, saveat = 1)
     l = sum(abs2, sol .- xy_data)
-    return l, sol
+    return l
 end
 ```
 
-Notice that our loss function returns the loss value as the first return,
-but returns extra information (the ODE solution with the new parameters)
-as an extra return argument.
-We will explain why this extra return information is helpful in the next section.
-
 ### Step 5: Solve the Optimization Problem
 
 This step will look very similar to [the first optimization tutorial](@ref first_opt),
-except now we have a new loss function `loss` which returns both the loss value
-and the associated ODE solution.
-(In the previous tutorial, `L` only returned the loss value.)
 The `Optimization.solve` function can accept an optional callback function
 to monitor the optimization process using extra arguments returned from `loss`.
 
@@ -300,15 +294,14 @@ The callback syntax is always:
 
 ```
 callback(
-    optimization variables,
+    state,
     the current loss value,
-    other arguments returned from the loss function, ...
 )
 ```
 
-In this case, we will provide the callback the arguments `(p, l, sol)`,
-since it always takes the current state of the optimization first (`p`)
-then the returns from the loss function (`l, sol`).
+In this case, we will provide the callback the arguments `(state, l)`,
+since it always takes the current state of the optimization first (`state`)
+then the current loss value (`l`).
 The return value of the callback function should default to `false`.
 `Optimization.solve` will halt if/when the callback function returns `true` instead.
 Typically the `return` statement would monitor the loss value
@@ -318,8 +311,10 @@ More details about callbacks in Optimization.jl can be found
 [here](https://docs.sciml.ai/Optimization/stable/API/solve/).
 
 ```@example odefit
-function callback(p, l, sol)
+function callback(p, l)
     display(l)
+    newprob = remake(prob, p = p)
+    sol = solve(newprob, saveat = 1)
     plt = plot(sol, ylim = (0, 6), label = ["Current x Prediction" "Current y Prediction"])
     scatter!(plt, t_data, xy_data', label = ["x Data" "y Data"])
     display(plt)
diff --git a/docs/src/showcase/blackhole.md b/docs/src/showcase/blackhole.md
index d3b90dc892..c993753558 100644
--- a/docs/src/showcase/blackhole.md
+++ b/docs/src/showcase/blackhole.md
@@ -490,10 +490,8 @@ function loss(NN_params)
         prob_nn, RK4(), u0 = u0, p = NN_params, saveat = tsteps, dt = dt, adaptive = false))
     pred_waveform = compute_waveform(dt_data, pred, mass_ratio, model_params)[1]
 
-    loss = (sum(abs2,
-        view(waveform, obs_to_use_for_training) .-
-        view(pred_waveform, obs_to_use_for_training)))
-    return loss, pred_waveform
+    loss = ( sum(abs2, view(waveform,obs_to_use_for_training) .- view(pred_waveform,obs_to_use_for_training) ) )
+    return loss
 end
 ```
 
@@ -508,10 +506,11 @@ We'll use the following callback to save the history of the loss values.
 ```@example ude
 losses = []
 
-callback(θ, l, pred_waveform; doplot = true) = begin
+callback(state, l; doplot = true) = begin
     push!(losses, l)
     #=  Disable plotting as it trains since in docs
     display(l)
+    waveform = compute_waveform(dt_data, soln, mass_ratio, model_params)[1]
     # plot current prediction against data
     plt = plot(tsteps, waveform,
         markershape=:circle, markeralpha = 0.25,
diff --git a/docs/src/showcase/missing_physics.md b/docs/src/showcase/missing_physics.md
index f10ff8b94c..393ad2b70d 100644
--- a/docs/src/showcase/missing_physics.md
+++ b/docs/src/showcase/missing_physics.md
@@ -222,7 +222,7 @@ current loss:
 ```@example ude
 losses = Float64[]
 
-callback = function (p, l)
+callback = function (state, l)
     push!(losses, l)
     if length(losses) % 50 == 0
         println("Current loss after $(length(losses)) iterations: $(losses[end])")
diff --git a/docs/src/showcase/pinngpu.md b/docs/src/showcase/pinngpu.md
index 0875715cbc..29c130e6b2 100644
--- a/docs/src/showcase/pinngpu.md
+++ b/docs/src/showcase/pinngpu.md
@@ -148,7 +148,7 @@ prob = discretize(pde_system, discretization)
 ## Step 6: Solve the Optimization Problem
 
 ```@example pinn
-callback = function (p, l)
+callback = function (state, l)
     println("Current loss is: $l")
     return false
 end

From 1f2cb6ae894c99f98caca23751841782b10cd069 Mon Sep 17 00:00:00 2001
From: Vaibhav Kumar Dixit <vaibhavyashdixit@gmail.com>
Date: Sat, 28 Sep 2024 09:28:09 -0400
Subject: [PATCH 2/7] Apply suggestions from code review

---
 docs/src/getting_started/fit_simulation.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/getting_started/fit_simulation.md b/docs/src/getting_started/fit_simulation.md
index cfef3f3bf7..5c845decf2 100644
--- a/docs/src/getting_started/fit_simulation.md
+++ b/docs/src/getting_started/fit_simulation.md
@@ -311,9 +311,9 @@ More details about callbacks in Optimization.jl can be found
 [here](https://docs.sciml.ai/Optimization/stable/API/solve/).
 
 ```@example odefit
-function callback(p, l)
+function callback(state, l)
     display(l)
-    newprob = remake(prob, p = p)
+    newprob = remake(prob, p = state.u)
     sol = solve(newprob, saveat = 1)
     plt = plot(sol, ylim = (0, 6), label = ["Current x Prediction" "Current y Prediction"])
     scatter!(plt, t_data, xy_data', label = ["x Data" "y Data"])

From b4f03cd7a21a5c2daefda724c724e07ed85ed4f4 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Mon, 28 Oct 2024 09:04:56 -0400
Subject: [PATCH 3/7] bump versions

---
 docs/Project.toml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index 044a380b59..1e279add11 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -78,12 +78,12 @@ ModelingToolkit = "9.9"
 MultiDocumenter = "0.7"
 NeuralPDE = "5.15"
 NonlinearSolve = "3"
-Optimization = "3"
-OptimizationMOI = "0.4"
-OptimizationNLopt = "0.2"
-OptimizationOptimJL = "0.2, 0.3"
-OptimizationOptimisers = "0.2"
-OptimizationPolyalgorithms = "0.2"
+Optimization = "4"
+OptimizationMOI = "0.5"
+OptimizationNLopt = "0.3"
+OptimizationOptimJL = "0.4"
+OptimizationOptimisers = "0.3"
+OptimizationPolyalgorithms = "0.3"
 OrdinaryDiffEq = "6"
 Plots = "1"
 SciMLExpectations = "2"

From d394b79ecc6666fa3fb73c8f718f9e270d838a98 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Mon, 28 Oct 2024 09:47:40 -0400
Subject: [PATCH 4/7] lux compat

---
 docs/Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index 1e279add11..50ae5c6c31 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -69,7 +69,7 @@ IncompleteLU = "0.2"
 Integrals = "4"
 LineSearches = "7"
 LinearSolve = "2"
-Lux = "0.5"
+Lux = "1"
 LuxCUDA = "0.3"
 MCMCChains = "6"
 Measurements = "2"

From 9803b162662ae72ebf4b6557f5fb49fbabfeae50 Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Mon, 28 Oct 2024 12:01:04 -0400
Subject: [PATCH 5/7] add comment about resolving and plotting in callback

---
 docs/src/getting_started/fit_simulation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/getting_started/fit_simulation.md b/docs/src/getting_started/fit_simulation.md
index 5c845decf2..4287307f7d 100644
--- a/docs/src/getting_started/fit_simulation.md
+++ b/docs/src/getting_started/fit_simulation.md
@@ -322,7 +322,7 @@ function callback(state, l)
 end
 ```
 
-With this callback function, every step of the optimization will display both the loss value and a plot of how the solution compares to the training data.
+With this callback function, every step of the optimization will display both the loss value and a plot of how the solution compares to the training data. Since we want to track the fit visually we plot the simulation at each iteration and compare it to the data. This is expensive since it requires an extra `solve` call and a plotting step for each iteration.
 
 Now, just like [the first optimization tutorial](@ref first_opt),
 we set up our `OptimizationFunction` and `OptimizationProblem`,

From 1ea8ed87bd0843e3e85abf11447359f5ce63038b Mon Sep 17 00:00:00 2001
From: Vaibhav Dixit <vaibhavyashdixit@gmail.com>
Date: Mon, 28 Oct 2024 16:46:03 -0400
Subject: [PATCH 6/7] fix links

---
 docs/src/highlevels/modeling_languages.md | 2 +-
 docs/src/showcase/gpu_spde.md             | 2 +-
 docs/src/showcase/symbolic_analysis.md    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/src/highlevels/modeling_languages.md b/docs/src/highlevels/modeling_languages.md
index 576759575b..e685b6c91a 100644
--- a/docs/src/highlevels/modeling_languages.md
+++ b/docs/src/highlevels/modeling_languages.md
@@ -50,7 +50,7 @@ doing standard molecular dynamics approximations.
 
 ## DiffEqFinancial.jl: Financial models for use in the DifferentialEquations ecosystem
 
-The goal of [DiffEqFinancial.jl](https://github.com/SciML/DiffEqFinancial.jl/commits/master) is to be a feature-complete set
+The goal of [DiffEqFinancial.jl](https://github.com/SciML/DiffEqFinancial.jl/) is to be a feature-complete set
 of solvers for the types of problems found in libraries like QuantLib, such as the Heston process or the
 Black-Scholes model.
 
diff --git a/docs/src/showcase/gpu_spde.md b/docs/src/showcase/gpu_spde.md
index f6d3db65d0..fe43d74b4a 100644
--- a/docs/src/showcase/gpu_spde.md
+++ b/docs/src/showcase/gpu_spde.md
@@ -302,7 +302,7 @@ These last two ways enclose the pointer to our cache arrays locally but still pr
 function f(du,u,p,t) to the ODE solver.
 
 Now, since PDEs are large, many times we don't care about getting the whole timeseries. Using
-the [output controls from DifferentialEquations.jl](https://diffeq.sciml.ai/latest/basics/common_solver_opts.html#Output-Control-1), we can make it only output the final timepoint.
+the [output controls from DifferentialEquations.jl](https://docs.sciml.ai/DiffEqDocs/stable/basics/common_solver_opts/), we can make it only output the final timepoint.
 
 ```julia
 prob = ODEProblem(f, u0, (0.0, 100.0))
diff --git a/docs/src/showcase/symbolic_analysis.md b/docs/src/showcase/symbolic_analysis.md
index b95fff2599..13f996ae0a 100644
--- a/docs/src/showcase/symbolic_analysis.md
+++ b/docs/src/showcase/symbolic_analysis.md
@@ -118,7 +118,7 @@ Did you implement the DAE incorrectly? No. Is the solver broken? No.
 
 It turns out that this is a property of the DAE that we are attempting to solve.
 This kind of DAE is known as an index-3 DAE. For a complete discussion of DAE
-index, see [this article](https://www.scholarpedia.org/article/Differential-algebraic_equations).
+index, see [this article](http://www.scholarpedia.org/article/Differential-algebraic_equations).
 Essentially, the issue here is that we have 4 differential variables (``x``, ``v_x``, ``y``, ``v_y``)
 and one algebraic variable ``T`` (which we can know because there is no `D(T)`
 term in the equations). An index-1 DAE always satisfies that the Jacobian of

From 109fdaca65837ccc758a4d63dde59e0c2bdaf3c5 Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Tue, 29 Oct 2024 09:13:43 -0100
Subject: [PATCH 7/7] Update make.jl

---
 docs/make.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/make.jl b/docs/make.jl
index 04b614b937..de3a9faf26 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -25,7 +25,8 @@ makedocs(sitename = "Overview of Julia's SciML",
         "https://epubs.siam.org/doi/10.1137/0903023",
         "https://bkamins.github.io/julialang/2020/12/24/minilanguage.html",
         "https://arxiv.org/abs/2109.06786",
-        "https://arxiv.org/abs/2001.04385"],
+        "https://arxiv.org/abs/2001.04385",
+        "https://code.visualstudio.com/"],
     format = Documenter.HTML(assets = ["assets/favicon.ico"],
         canonical = "https://docs.sciml.ai/stable/",
         mathengine = mathengine),