Release 3.2.3

Miscellaneous improvements
ojwoodford · Oct 12, 2023 · ce42968 · ojwoodford · Oct 12, 2023 · ce42968
1 parent ced2471
commit ce42968
Show file tree

Hide file tree

Showing 9 changed files with 58 additions and 36 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "NLLSsolver"
 uuid = "50b5cf9e-bf7a-4e29-8981-4280cbba70cb"
 authors = ["Oliver Woodford"]
-version = "3.2.2"
+version = "3.2.3"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

diff --git a/examples/bundleadjustment.jl b/examples/bundleadjustment.jl
@@ -46,13 +46,16 @@ Base.eltype(::BALResidual{T}) where T = T
 function NLLSsolver.computeresjac(::StaticInt{3}, residual::BALResidual, im, point)
     # Exploit the parameterization to make the jacobian computation more efficient
     res, jac = NLLSsolver.computeresjac(static(1), residual, im, point)
-    return res, hcat(jac, -view(jac, :, NLLSsolver.SR(4, 6)))
+    return res, hcat(jac, @inbounds(-view(jac, :, NLLSsolver.SR(4, 6))))
 end
 
+# Define the type of the problem
+BALProblem = NLLSsolver.NLLSProblem{Union{BALImage{Float64}, NLLSsolver.Point3D{Float64}}, BALResidual{Float64}}
+
 # Function to create a NLLSsolver problem from a BAL dataset
-function makeBALproblem(data)
+function makeBALproblem(data)::BALProblem
     # Create the problem
-    problem = NLLSsolver.NLLSProblem(Union{BALImage{Float64}, NLLSsolver.Point3D{Float64}}, BALResidual{Float64})
+    problem = BALProblem()
 
     # Add the camera variable blocks
     for cam in data.cameras
@@ -73,7 +76,7 @@ function makeBALproblem(data)
     return problem
 end
 
-function loadBALproblem(name)
+function loadBALproblem(name)::BALProblem
     # Create the problem
     t = @elapsed begin
         data = loadbaldataset(name)
@@ -120,7 +123,7 @@ function optimizeBALproblem(problem::NLLSsolver.NLLSProblem)
     # Compute the starting AUC
     println("   Start AUC: ", computeauc(problem, 2.0))
     # Optimize the cost
-    result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.levenbergmarquardt, reldcost=1.0e-6, callback=NLLSsolver.printoutcallback))
+    result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.levenbergmarquardt, reldcost=1.0e-6), nothing, NLLSsolver.printoutcallback)
     # Compute the final AUC
     println("   Final AUC: ", computeauc(problem, 2.0))
     # Print out the solver summary

diff --git a/src/VectorRepo.jl b/src/VectorRepo.jl
@@ -1,11 +1,11 @@
 
 struct VectorRepo{T}
     data::Dict{DataType, Vector}
-    function VectorRepo{T}() where T
-        return new{T}(Dict{DataType, Vector}())
+    function VectorRepo{T}(args...) where T
+        return new{T}(Dict{DataType, Vector}(args...))
     end
 end
-VectorRepo() = VectorRepo{Any}()
+VectorRepo(args...) = VectorRepo{Any}(args...)
 
 function Base.get(vr::VectorRepo{T}, ::Type{type})::Vector{type} where {T, type}
     @assert type<:T "Invalid type"

diff --git a/src/linearsystem.jl b/src/linearsystem.jl
@@ -118,14 +118,15 @@ function makesymmvls(problem, unfixed, nblocks)
     end
 
     # Decide whether to have a sparse or a dense system
-    if sum(blocksizes) > 1000
+    len = sum(blocksizes)
+    if len >= 40
         # Compute the block sparsity
         sparsity = getvarcostmap(problem)
         sparsity = sparsity[unfixed,:]
         sparsity = triu(sparse(sparsity * sparsity' .> 0))
 
         # Check sparsity level
-        if nnz(sparsity) * 4 < length(sparsity)
+        if sparse_dense_decision(len, block_sparse_nnz(sparsity, blocksizes))
             # Construct the BSM
             bsm = BlockSparseMatrix{Float64}(sparsity, blocksizes, blocksizes)
 
@@ -140,8 +141,8 @@ end
 
 function updatesymlinearsystem!(linsystem::UniVariateLS, g, H, unusedargs...)
     # Update the blocks in the problem
-    @fastmath linsystem.b .+= g
-    @fastmath linsystem.A .+= H
+    linsystem.b .+= g
+    linsystem.A .+= H
 end
 
 function updatesymA!(A, a, vars, varflags, blockindices)
@@ -152,7 +153,7 @@ function updatesymA!(A, a, vars, varflags, blockindices)
             nvi = nvars(vars[i])
             rangei = SR(1, nvi) .+ loffseti
             loffseti += nvi
-            @fastmath @inbounds block(A, blockindices[i], blockindices[i], nvi, nvi) .+= view(a, rangei, rangei)
+            @inbounds block(A, blockindices[i], blockindices[i], nvi, nvi) .+= view(a, rangei, rangei)
 
             loffsetj = static(0)
             @unroll for j in 1:i-1
@@ -161,9 +162,9 @@ function updatesymA!(A, a, vars, varflags, blockindices)
                     rangej = SR(1, nvj) .+ loffsetj
                     loffsetj += nvj
                     if @inbounds blockindices[i] >= blockindices[j] # Make sure the block matrix is lower triangular
-                        @fastmath @inbounds block(A, blockindices[i], blockindices[j], nvi, nvj) .+= view(a, rangei, rangej)
+                        @inbounds block(A, blockindices[i], blockindices[j], nvi, nvj) .+= view(a, rangei, rangej)
                     else
-                        @fastmath @inbounds block(A, blockindices[j], blockindices[i], nvj, nvi) .+= view(a, rangej, rangei)
+                        @inbounds block(A, blockindices[j], blockindices[i], nvj, nvi) .+= view(a, rangej, rangei)
                     end
                 end
             end
@@ -178,7 +179,7 @@ function updateb!(B, b, vars, varflags, boffsets, blockindices)
         if i <= length(vars) && bitiset(varflags, i)
             nv = nvars(vars[i])
             range = SR(0, nv-1)
-            @fastmath @inbounds view(B, range .+ boffsets[blockindices[i]]) .+= view(b, range .+ loffset)
+            @inbounds view(B, range .+ boffsets[blockindices[i]]) .+= view(b, range .+ loffset)
             loffset += nv
         end
     end

diff --git a/src/marginalize.jl b/src/marginalize.jl
@@ -116,15 +116,16 @@ function constructcrop(from::MultiVariateLSsparse, fromblock, forcesparse=false)
     toblocksizes = view(from.A.rowblocksizes, 1:fromblock-1)
 
     # Decide whether to have a sparse or a dense system
-    @inbounds if forcesparse || sum(toblocksizes) > 1000
+    len = sum(toblocksizes)
+    @inbounds if forcesparse || len >= 40
         # Compute the crop sparsity
         cacheindices(from.A)
         toblock = fromblock - 1
         cropsparsity = view(from.A.indices + from.A.indicestransposed, :, 1:toblock) # Do view before sum when this issue is fixed: https://github.com/JuliaSparse/SparseArrays.jl/issues/441
         cropsparsity = triu(cropsparsity' * cropsparsity)
 
         # Check sparsity level
-        if forcesparse || nnz(cropsparsity) * 4 < length(cropsparsity)
+        if forcesparse || sparse_dense_decision(len, block_sparse_nnz(cropsparsity, toblocksizes))
             # Add any missing blocks to the cropped region
             start = from.A.indicestransposed.nzval[from.A.indicestransposed.colptr[fromblock]]
             blocksizes = convert.(Int, @inbounds view(from.A.rowblocksizes, 1:toblock))

diff --git a/src/optimize.jl b/src/optimize.jl
@@ -129,7 +129,7 @@ function optimizeinternal!(problem::NLLSProblem, options::NLLSOptions, data, ite
         end
         data.gradientcomputations += 1
     end
-    if data.bestcost < cost
+    if !(data.bestcost >= cost)
         # Update the problem variables to the best ones found
         updatefrombest!(problem, data)
     end

diff --git a/src/residual.jl b/src/residual.jl
@@ -21,7 +21,7 @@ function computerescostgradhess(varflags, residual, kernel, vars)
         res = computeresidual(residual, vars...)
         cost, dc, d2c = robustifydkernel(kernel, sqnorm(res))
         kernelvarind = SR(1, nvars(kernel))
-        return 0.5 * Float64(cost), dc[kernelvarind], d2c[kernelvarind, kernelvarind]
+        return @inbounds (0.5 * Float64(cost), dc[kernelvarind], d2c[kernelvarind, kernelvarind])
     end
 
     # Compute the residual and Jacobian
@@ -30,23 +30,23 @@ function computerescostgradhess(varflags, residual, kernel, vars)
     # Compute the unrobust cost, gradient and Hessian
     cost = sqnorm(res)
     g = jac' * res
+    H = jac' * jac
 
     if varflags & 1 == 0
         # The kernel is not optimized. Differentiate w.r.t. the cost only
         cost, dc, d2c = robustifydcost(kernel, cost)
     else
         # Differentiate w.r.t. the cost and kernel parameters
         cost, dc_, d2c_ = robustifydkernel(kernel, cost)
-        dc = dc_[end]
-        d2c = d2c_[end,end]
+        @inbounds dc = dc_[end]
+        @inbounds d2c = d2c_[end,end]
 
         # Compute the d^2/dkernel.dvariables block
         kernelvarind = SR(1, nvars(kernel))
-        @fastmath dkdv = g * view(d2c_, kernelvarind, nvars(kernel)+1)'
+        @inbounds dkdv = g * view(d2c_, kernelvarind, nvars(kernel)+1)'
     end
 
     # IRLS reweighting of Hessian
-    H = jac' * jac
     if dc != 1
         H *= dc
     end
@@ -61,8 +61,8 @@ function computerescostgradhess(varflags, residual, kernel, vars)
 
     if varflags & 1 == 1
         # Add on the kernel derivative blocks
-        g = vcat(view(dc_, kernelvarind), g)
-        H = hcat(vcat(view(d2c_, kernelvarind, kernelvarind), dkdv), vcat(dkdv', H))
+        @inbounds g = vcat(view(dc_, kernelvarind), g)
+        @inbounds H = hcat(vcat(view(d2c_, kernelvarind, kernelvarind), dkdv), vcat(dkdv', H))
     end
 
     # Return the cost and derivatives

diff --git a/src/utils.jl b/src/utils.jl
@@ -104,3 +104,17 @@ function fast_bAb(A::SparseMatrixCSC, b::Vector)
     end
     return total
 end
+
+sparse_dense_decision(d, nnz) = (nnz * 32) < (25 * d * (d - 40)) # Threshold nnz = 25/32 * (d^2 - 40d)
+
+function block_sparse_nnz(sparsity, blocksizes)
+    # Compute the number of non-zeros in a block sparse matrix
+    nnz = 0
+    for col in 1:size(sparsity, 2)
+        @inbounds bc = blocksizes[col]
+        for r in nzrange(sparsity, col)
+            nnz += bc * @inbounds blocksizes[rowvals(sparsity)[r]]
+        end
+    end
+    return nnz
+end
diff --git a/test/optimizeba.jl b/test/optimizeba.jl
@@ -28,12 +28,15 @@ function create_ba_problem(ncameras, nlandmarks, propvisible)
     end
 
     # Generate the measurements
-    visibility = sprand(nlandmarks, ncameras, propvisible)
+    visibility = abs.(repeat(vec(1:ncameras), outer=(1, nlandmarks)) .- LinRange(2, ncameras-1, nlandmarks)')
+    visibility = visibility .<= sort(vec(visibility))[Int(ceil(length(visibility)*propvisible))]
     for camind = 1:ncameras
         camera = problem.variables[camind]::EffPose3D{Float64}
-        for landmark = view(visibility.rowval, visibility.colptr[camind]:visibility.colptr[camind+1]-1)
-            landmarkind = landmark + ncameras
-            addcost!(problem, ProjError(project(camera * problem.variables[landmarkind]::Point3D{Float64}), camind, landmarkind))
+        for (landmark, tf) in enumerate(view(visibility, camind, :)')
+            if tf
+                landmarkind = landmark + ncameras
+                addcost!(problem, ProjError(project(camera * problem.variables[landmarkind]::Point3D{Float64}), camind, landmarkind))
+            end
         end
     end
 
@@ -54,8 +57,8 @@ end
 
 @testset "optimizeba.jl" begin
     # Generate some test data for a dense problem
-    Random.seed!(5)
-    problem = create_ba_problem(10, 100, 0.3)
+    Random.seed!(1)
+    problem = create_ba_problem(3, 5, 1.0)
 
     # Test reordering the costs
     problem = perturb_ba_problem(problem, 0.003, 0.0)
@@ -73,8 +76,8 @@ end
     @test result.bestcost < 1.e-15
 
     # Generate & optimize a sparse problem
-    problem = create_ba_problem(10, 400, 0.3)
-    problem = perturb_ba_problem(problem, 1.e-7, 1.e-7)
+    problem = create_ba_problem(10, 50, 0.3)
+    problem = perturb_ba_problem(problem, 0.001, 0.001)
     result = optimize!(problem)
-    @test result.bestcost < result.startcost * 1e-10
+    @test result.bestcost < 1.e-15
 end