From 9a9773e6f103abc6370fa6c848e805b43c60e62b Mon Sep 17 00:00:00 2001
From: keineahnung2345 <mimifasosofamire1123@gmail.com>
Date: Tue, 3 Oct 2023 00:12:54 +0800
Subject: [PATCH 1/9] make directories of multiple layers (#3893)

---
 scripts/gen_anchors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gen_anchors.py b/scripts/gen_anchors.py
index 709cb88adab..a1a23438e95 100644
--- a/scripts/gen_anchors.py
+++ b/scripts/gen_anchors.py
@@ -117,7 +117,7 @@ def main(argv):
     args = parser.parse_args()
     
     if not os.path.exists(args.output_dir):
-        os.mkdir(args.output_dir)
+        os.makedirs(args.output_dir)
 
     f = open(args.filelist)
   

From 27b37bf1033c1524121850f3705044f50c0c1b8c Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Mon, 30 Oct 2023 17:26:03 +0100
Subject: [PATCH 2/9] add missing dependencies for openvino (#8860)

* add missing automake
* add missing autoconf-archive
---
 .github/workflows/ccpp.yml  | 8 ++++----
 .github/workflows/on_pr.yml | 8 ++++----
 scripts/setup.sh            | 5 +++--
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index ad386f42c91..af7c5b9a77f 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -87,7 +87,7 @@ jobs:
     - name: Update apt
       run: sudo apt update
     - name: Install dependencies
-      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
+      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
     - name: Clean downloads
       run: sudo apt-get clean
 
@@ -157,7 +157,7 @@ jobs:
     - name: Update apt
       run: sudo apt update
     - name: Install dependencies
-      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
+      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
     - name: Clean downloads
       run: sudo apt-get clean
 
@@ -210,7 +210,7 @@ jobs:
     - name: Update apt
       run: sudo apt update
     - name: Install dependencies
-      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
+      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
     - name: Clean downloads
       run: sudo apt-get clean
 
@@ -398,7 +398,7 @@ jobs:
       if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }}
 
     - name: Install dependencies
-      run: brew install libomp yasm nasm pkg-config
+      run: brew install libomp yasm nasm pkg-config automake autoconf-archive
 
     - uses: lukka/get-cmake@latest
 
diff --git a/.github/workflows/on_pr.yml b/.github/workflows/on_pr.yml
index 6e7d36b807c..409523fd7e3 100644
--- a/.github/workflows/on_pr.yml
+++ b/.github/workflows/on_pr.yml
@@ -73,7 +73,7 @@ jobs:
     - name: Update apt
       run: sudo apt update
     - name: Install dependencies
-      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
+      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
     - name: Clean downloads
       run: sudo apt-get clean
 
@@ -116,7 +116,7 @@ jobs:
     - name: Update apt
       run: sudo apt update
     - name: Install dependencies
-      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
+      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
     - name: Clean downloads
       run: sudo apt-get clean
 
@@ -159,7 +159,7 @@ jobs:
     - name: Update apt
       run: sudo apt update
     - name: Install dependencies
-      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
+      run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
     - name: Clean downloads
       run: sudo apt-get clean
 
@@ -289,7 +289,7 @@ jobs:
     - uses: actions/checkout@v3
 
     - name: Install dependencies
-      run: brew install libomp yasm nasm pkg-config
+      run: brew install libomp yasm nasm pkg-config automake autoconf-archive
 
     - uses: lukka/get-cmake@latest
 
diff --git a/scripts/setup.sh b/scripts/setup.sh
index e583e972369..6cbc7be8fa1 100755
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
@@ -75,7 +75,7 @@ elif [[ $(cut -f2 <<< $(lsb_release -i)) == "Ubuntu" ]]; then
   if [ "$install_tools" = true ] ; then
     echo "Installing tools"
     sudo apt-get update
-    sudo apt-get install -y  --no-install-recommends git ninja-build build-essential g++ nasm yasm gperf
+    sudo apt-get install -y --no-install-recommends git ninja-build build-essential g++ nasm yasm gperf
     sudo apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg software-properties-common wget
     sudo apt-get install -y --no-install-recommends libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev
     wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
@@ -88,7 +88,8 @@ elif [[ $(cut -f2 <<< $(lsb_release -i)) == "Ubuntu" ]]; then
     sudo apt-get install -y --no-install-recommends cmake
     sudo apt-get install -y --no-install-recommends powershell
     sudo apt-get install -y --no-install-recommends curl zip unzip tar
-    sudo apt-get install -y --no-install-recommends pkg-config autoconf libtool bison
+    sudo apt-get install -y --no-install-recommends bison
+    sudo apt-get install -y --no-install-recommends automake autoconf libtool pkg-config autoconf-archive
     sudo apt-get clean
   fi
 else

From ffdd5e41a83e87dee978575a3435d3083fe53286 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Fri, 3 May 2024 15:35:17 +0200
Subject: [PATCH 3/9] build.ps1 is arm64 compatible (#8900)

---
 build.ps1          | 115 +++++++++++++++++++++++++++++++++++----------
 scripts/utils.psm1 | 112 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 198 insertions(+), 29 deletions(-)

diff --git a/build.ps1 b/build.ps1
index e378e4b4536..de8916d94a6 100755
--- a/build.ps1
+++ b/build.ps1
@@ -6,7 +6,7 @@
         build
         Created By: Stefano Sinigardi
         Created Date: February 18, 2019
-        Last Modified Date: September 25, 2023
+        Last Modified Date: April 29, 2024
 
 .DESCRIPTION
 Build tool using CMake, trying to properly setup the environment around compiler
@@ -92,6 +92,12 @@ Force using a different buildtrees dir for vcpkg
 .PARAMETER ForceVCPKGPackagesRemoval
 Force clean up of vcpkg packages folder at the end of the script
 
+.PARAMETER CloneVCPKGShallow
+Clone vcpkg as shallow repository
+
+.PARAMETER ForceDisableVCPKGShallow
+Force vcpkg clone to NOT be a shallow one
+
 .PARAMETER ForceSetupVS
 Forces Visual Studio setup, also on systems on which it would not have been enabled automatically
 
@@ -179,6 +185,8 @@ param (
   [switch]$ForceVCPKGBuildtreesRemoval = $false,
   [string]$ForceVCPKGBuildtreesPath = "",
   [switch]$ForceVCPKGPackagesRemoval = $false,
+  [switch]$CloneVCPKGShallow = $false,
+  [switch]$ForceDisableVCPKGShallow = $false,
   [switch]$ForceSetupVS = $false,
   [switch]$ForceCMakeFromVS = $false,
   [switch]$ForceNinjaFromVS = $false,
@@ -193,13 +201,33 @@ param (
 
 $global:DisableInteractive = $DisableInteractive
 
-$build_ps1_version = "3.6.1"
+$build_ps1_version = "4.0.1"
 $script_name = $MyInvocation.MyCommand.Name
 $utils_psm1_avail = $false
 
-if (Test-Path $PSScriptRoot/scripts/utils.psm1) {
+if (Test-Path $PSScriptRoot/utils.psm1) {
+  Import-Module -Name $PSScriptRoot/utils.psm1 -Force
+  $utils_psm1_avail = $true
+}
+elseif (Test-Path $PSScriptRoot/cmake/utils.psm1) {
+  Import-Module -Name $PSScriptRoot/cmake/utils.psm1 -Force
+  $utils_psm1_avail = $true
+  $IsInGitSubmodule = $false
+}
+elseif (Test-Path $PSScriptRoot/ci/utils.psm1) {
+  Import-Module -Name $PSScriptRoot/ci/utils.psm1 -Force
+  $utils_psm1_avail = $true
+  $IsInGitSubmodule = $false
+}
+elseif (Test-Path $PSScriptRoot/ccm/utils.psm1) {
+  Import-Module -Name $PSScriptRoot/ccm/utils.psm1 -Force
+  $utils_psm1_avail = $true
+  $IsInGitSubmodule = $false
+}
+elseif (Test-Path $PSScriptRoot/scripts/utils.psm1) {
   Import-Module -Name $PSScriptRoot/scripts/utils.psm1 -Force
   $utils_psm1_avail = $true
+  $IsInGitSubmodule = $false
 }
 else {
   $utils_psm1_version = "unavail"
@@ -306,6 +334,10 @@ if ($InstallDARKNETthroughVCPKG -and -not $EnableOPENCV) {
 
 if ($UseVCPKG) {
   Write-Host "vcpkg bootstrap script: bootstrap-vcpkg${bootstrap_ext}"
+    if(($ForceOpenCVVersion -eq 0) -and -Not $ForceDisableVCPKGShallow) {
+      Write-Host "vcpkg will be cloned in shallow mode since baseline is not needed"
+      $CloneVCPKGShallow = $true
+    }
 }
 
 if ((-Not $IsWindows) -and (-Not $IsWindowsPowerShell) -and (-Not $ForceSetupVS)) {
@@ -323,6 +355,31 @@ if (($IsLinux -or $IsMacOS) -and ($ForceGCCVersion -gt 0)) {
   $env:CXX = "g++-$ForceGCCVersion"
 }
 
+$osArchitecture = [System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture
+switch ($osArchitecture) {
+  "X86" {
+    $vcpkgArchitecture = "x86"
+    $vsArchitecture = "Win32"
+  }
+  "X64" {
+    $vcpkgArchitecture = "x64"
+    $vsArchitecture = "x64"
+  }
+  "Arm" {
+    $vcpkgArchitecture = "arm"
+    $vsArchitecture = "arm"
+  }
+  "Arm64" {
+    $vcpkgArchitecture = "arm64"
+    $vsArchitecture = "arm64"
+  }
+  default {
+    $vcpkgArchitecture = "x64"
+    $vsArchitecture = "x64"
+    Write-Output "Unknown architecture. Trying x64"
+  }
+}
+
 $vcpkg_triplet_set_by_this_script = $false
 $vcpkg_host_triplet_set_by_this_script = $false
 
@@ -336,48 +393,48 @@ if (($IsWindows -or $IsWindowsPowerShell) -and (-Not $env:VCPKG_DEFAULT_TRIPLET)
       $DoNotUseNinja = $true
       Write-Host "Warning: when building for 32bit windows target, only msbuild can be used and ninja will be disabled. Doing that for you!" -ForegroundColor Yellow
     }
-    $env:VCPKG_DEFAULT_TRIPLET = "x86-windows"
+    $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-windows"
     $vcpkg_triplet_set_by_this_script = $true
   }
   else {
     if($BuildDebug) {
-      $env:VCPKG_DEFAULT_TRIPLET = "x64-windows"
+      $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-windows"
       $vcpkg_triplet_set_by_this_script = $true
     }
     else {
-      $env:VCPKG_DEFAULT_TRIPLET = "x64-windows-release"
+      $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-windows-release"
       $vcpkg_triplet_set_by_this_script = $true
     }
   }
 }
 if (($IsWindows -or $IsWindowsPowerShell) -and (-Not $env:VCPKG_DEFAULT_HOST_TRIPLET)) {
   if ($BuildDebug) {
-    $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-windows"
+    $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-windows"
     $vcpkg_host_triplet_set_by_this_script = $true
   }
   else {
-    $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-windows-release"
+    $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-windows-release"
     $vcpkg_host_triplet_set_by_this_script = $true
   }
 }
 
 if ($IsMacOS -and (-Not $env:VCPKG_DEFAULT_TRIPLET)) {
   if ($BuildDebug) {
-    $env:VCPKG_DEFAULT_TRIPLET = "x64-osx"
+    $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-osx"
     $vcpkg_triplet_set_by_this_script = $true
   }
   else {
-    $env:VCPKG_DEFAULT_TRIPLET = "x64-osx-release"
+    $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-osx-release"
     $vcpkg_triplet_set_by_this_script = $true
   }
 }
 if ($IsMacOS -and (-Not $env:VCPKG_DEFAULT_HOST_TRIPLET)) {
   if ($BuildDebug) {
-    $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-osx"
+    $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-osx"
     $vcpkg_host_triplet_set_by_this_script = $true
   }
   else {
-    $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-osx-release"
+    $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-osx-release"
     $vcpkg_host_triplet_set_by_this_script = $true
   }
 }
@@ -385,22 +442,22 @@ if ($IsMacOS -and (-Not $env:VCPKG_DEFAULT_HOST_TRIPLET)) {
 if ($IsLinux -and (-Not $env:VCPKG_DEFAULT_TRIPLET)) {
   if ($true) {
     if ($BuildDebug) {
-      $env:VCPKG_DEFAULT_TRIPLET = "x64-linux"
+      $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-linux"
       $vcpkg_triplet_set_by_this_script = $true
     }
     else {
-      $env:VCPKG_DEFAULT_TRIPLET = "x64-linux-release"
+      $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-linux-release"
       $vcpkg_triplet_set_by_this_script = $true
     }
   }
 }
 if ($IsLinux -and (-Not $env:VCPKG_DEFAULT_HOST_TRIPLET)) {
   if ($BuildDebug) {
-    $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-linux"
+    $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-linux"
     $vcpkg_host_triplet_set_by_this_script = $true
   }
   else {
-    $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-linux-release"
+    $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-linux-release"
     $vcpkg_host_triplet_set_by_this_script = $true
   }
 }
@@ -630,11 +687,11 @@ if (-Not $DoNotUseNinja) {
 
 if (-Not $DoNotSetupVS) {
   $CL_EXE = Get-Command "cl" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition
-  if ((-Not $CL_EXE) -or ($CL_EXE -match "HostX86\\x86") -or ($CL_EXE -match "HostX64\\x86")) {
+  if (-Not $CL_EXE) {
     $vsfound = getLatestVisualStudioWithDesktopWorkloadPath
     Write-Host "Found VS in ${vsfound}"
     Push-Location "${vsfound}/Common7/Tools"
-    cmd.exe /c "VsDevCmd.bat -arch=x64 & set" |
+    cmd.exe /c "VsDevCmd.bat -arch=${vsArchitecture} & set" |
     ForEach-Object {
       if ($_ -match "=") {
         $v = $_.split("="); Set-Item -force -path "ENV:\$($v[0])"  -value "$($v[1])"
@@ -650,26 +707,26 @@ if (-Not $DoNotSetupVS) {
     $debugConfig = " --config Debug "
     $releaseConfig = " --config Release "
     if ($Use32bitTriplet) {
-      $targetArchitecture = "`"Win32`""
+      $targetArchitecture = "`"${vsArchitecture}`""
     }
     else {
-      $targetArchitecture = "`"x64`""
+      $targetArchitecture = "`"${vsArchitecture}`""
     }
     if ($tokens[0] -eq "14") {
       $generator = "Visual Studio 14 2015"
-      $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A $targetArchitecture"
+      $AdditionalBuildSetup = $AdditionalBuildSetup + " -A $targetArchitecture"
     }
     elseif ($tokens[0] -eq "15") {
       $generator = "Visual Studio 15 2017"
-      $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A $targetArchitecture"
+      $AdditionalBuildSetup = $AdditionalBuildSetup + " -A $targetArchitecture"
     }
     elseif ($tokens[0] -eq "16") {
       $generator = "Visual Studio 16 2019"
-      $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A $targetArchitecture"
+      $AdditionalBuildSetup = $AdditionalBuildSetup + " -A $targetArchitecture"
     }
     elseif ($tokens[0] -eq "17") {
       $generator = "Visual Studio 17 2022"
-      $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A $targetArchitecture"
+      $AdditionalBuildSetup = $AdditionalBuildSetup + " -A $targetArchitecture"
     }
     else {
       MyThrow("Unknown Visual Studio version, unsupported configuration")
@@ -739,7 +796,7 @@ if ($UseVCPKG -And -Not $ForceLocalVCPKG) {
 if (($null -eq $vcpkg_path) -and $UseVCPKG) {
   if (-Not (Test-Path "$PWD/vcpkg${VCPKGSuffix}")) {
     $shallow_copy = ""
-    if(($ForceOpenCVVersion -eq 0)) {
+    if($CloneVCPKGShallow) {
       $shallow_copy = " --depth 1 "
     }
     $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "clone $shallow_copy https://github.com/microsoft/vcpkg vcpkg${VCPKGSuffix}"
@@ -999,6 +1056,9 @@ else {
       Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $debug_build_folder
     }
 
+    if (-Not (Test-Path $DebugInstallPrefix)) {
+      New-Item -Path $DebugInstallPrefix -ItemType directory -Force | Out-Null
+    }
     New-Item -Path $debug_build_folder -ItemType directory -Force | Out-Null
     Set-Location $debug_build_folder
     $cmake_args = "-G `"$generator`" ${DebugBuildSetup} ${AdditionalBuildSetup} -S .."
@@ -1031,6 +1091,9 @@ else {
     Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $release_build_folder
   }
 
+  if (-Not (Test-Path $ReleaseInstallPrefix)) {
+    New-Item -Path $ReleaseInstallPrefix -ItemType directory -Force | Out-Null
+  }
   New-Item -Path $release_build_folder -ItemType directory -Force | Out-Null
   Set-Location $release_build_folder
   $cmake_args = "-G `"$generator`" ${ReleaseBuildSetup} ${AdditionalBuildSetup} -S .."
@@ -1056,7 +1119,7 @@ else {
   if (-Not ($exitCode -eq 0)) {
     MyThrow("Build failed! Exited with error code $exitCode.")
   }
-  if (-Not $UseVCPKG -And -Not $DisableDLLcopy) {
+  if ($IsWindows -And -Not $UseVCPKG -And -Not $DisableDLLcopy) {
     $dllfiles = Get-ChildItem ./${dllfolder}/*.dll
     if ($dllfiles) {
       Copy-Item $dllfiles ..
diff --git a/scripts/utils.psm1 b/scripts/utils.psm1
index 928d9339ae8..6e22056957c 100644
--- a/scripts/utils.psm1
+++ b/scripts/utils.psm1
@@ -22,7 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 #>
 
-$utils_psm1_version = "0.3.0"
+$utils_psm1_version = "1.2.3"
 $IsWindowsPowerShell = switch ( $PSVersionTable.PSVersion.Major ) {
   5 { $true }
   4 { $true }
@@ -167,6 +167,24 @@ function getLatestVisualStudioWithDesktopWorkloadVersion([bool]$required = $true
   return $installationVersion
 }
 
+function setupVisualStudio([bool]$required = $true) {
+  $CL_EXE = Get-Command "cl" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition
+  if ((-Not $CL_EXE) -or ($CL_EXE -match "HostX86\\x86") -or ($CL_EXE -match "HostX64\\x86")) {
+    $vsfound = getLatestVisualStudioWithDesktopWorkloadPath
+    Write-Host "Found VS in ${vsfound}"
+    Push-Location "${vsfound}/Common7/Tools"
+    cmd.exe /c "VsDevCmd.bat -arch=x64 & set" |
+    ForEach-Object {
+      if ($_ -match "=") {
+        $v = $_.split("="); Set-Item -force -path "ENV:\$($v[0])"  -value "$($v[1])"
+      }
+    }
+    Pop-Location
+    $env:PATH = "${vsfound}/VC/Tools/Llvm/x64/bin;$env:PATH"
+    Write-Host "Visual Studio Command Prompt variables set"
+  }
+}
+
 function DownloadNinja() {
   Write-Host "Downloading a portable version of Ninja" -ForegroundColor Yellow
   Remove-Item -Force -Recurse -ErrorAction SilentlyContinue ninja
@@ -192,12 +210,12 @@ function DownloadNinja() {
 function DownloadAria2() {
   Write-Host "Downloading a portable version of Aria2" -ForegroundColor Yellow
   if ($IsWindows -or $IsWindowsPowerShell) {
-    $basename = "aria2-1.35.0-win-32bit-build1"
+    $basename = "aria2-1.37.0-win-32bit-build1"
     $zipName = "${basename}.zip"
     $outFolder = "$basename/$basename"
     Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $outFolder
     Remove-Item -Force -ErrorAction SilentlyContinue $zipName
-    $url = "https://github.com/aria2/aria2/releases/download/release-1.35.0/$zipName"
+    $url = "https://github.com/aria2/aria2/releases/download/release-1.37.0/$zipName"
     Invoke-RestMethod -Uri $url -Method Get -ContentType application/zip -OutFile $zipName
     Expand-Archive -Path $zipName
   }
@@ -228,6 +246,29 @@ function DownloadAria2() {
   return "./$outFolder/aria2c${ExecutableSuffix}"
 }
 
+function DownloadLicencpp() {
+  $licencpp_version = "0.2.1"
+  Write-Host "Downloading a portable version of licencpp v${licencpp_version}" -ForegroundColor Yellow
+  if ($IsWindows -or $IsWindowsPowerShell) {
+    $basename = "licencpp-Windows"
+  }
+  elseif ($IsLinux) {
+    $basename = "licencpp-Linux"
+  }
+  else {
+    MyThrow("Unknown OS, unsupported")
+  }
+  $zipName = "${basename}.zip"
+  $outFolder = "${basename}"
+  Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $outFolder
+  Remove-Item -Force -ErrorAction SilentlyContinue $zipName
+  $url = "https://github.com/cenit/licencpp/releases/download/v${licencpp_version}/$zipName"
+  Invoke-RestMethod -Uri $url -Method Get -ContentType application/zip -OutFile $zipName
+  Expand-Archive -Path $zipName
+  Remove-Item -Force -ErrorAction SilentlyContinue $zipName
+  return "./$outFolder/licencpp${ExecutableSuffix}"
+}
+
 function Download7Zip() {
   Write-Host "Downloading a portable version of 7-Zip" -ForegroundColor Yellow
   if ($IsWindows -or $IsWindowsPowerShell) {
@@ -324,6 +365,65 @@ Function MyThrow ($Message) {
   }
 }
 
+Function CopyTexFile ($MyFile) {
+  $MyFileName = Split-Path $MyFile -Leaf
+  New-Item -ItemType Directory -Force -Path "~/${latex_path}" | Out-Null
+  if (-Not (Test-Path "~/${latex_path}/$MyFileName" )) {
+    Write-Host "Copying $MyFile to ~/${latex_path}"
+    Copy-Item "$MyFile" "~/${latex_path}"
+  }
+  else {
+    Write-Host "~/${latex_path}/$MyFileName already present"
+  }
+}
+
+Function dos2unix {
+  Param (
+    [Parameter(mandatory = $true)]
+    [string[]]$path
+  )
+
+  Get-ChildItem -File -Recurse -Path $path |
+  ForEach-Object {
+    Write-Host "Converting $_"
+    $x = get-content -raw -path $_.fullname; $x -replace "`r`n", "`n" | Set-Content -NoNewline -Force -path $_.fullname
+  }
+}
+
+Function unix2dos {
+  Param (
+    [Parameter(mandatory = $true)]
+    [string[]]$path
+  )
+
+  Get-ChildItem -File -Recurse -Path $path |
+  ForEach-Object {
+    $x = get-content -raw -path $_.fullname
+    $SearchStr = [regex]::Escape("`r`n")
+    $SEL = Select-String -InputObject $x -Pattern $SearchStr
+    if ($null -ne $SEL) {
+      Write-Host "Converting $_"
+      # do nothing: avoid creating files containing `r`r`n when using unix2dos twice on the same file
+    }
+    else {
+      Write-Host "Converting $_"
+      $x -replace "`n", "`r`n" | Set-Content -NoNewline -Force -path $_.fullname
+    }
+  }
+}
+
+Function UpdateRepo {
+  if ($GIT_EXE) {
+    Get-ChildItem -Directory |
+      ForEach-Object {
+      Set-Location $_.Name
+      git pull
+      git submodule update --recursive
+      Set-Location ..
+    }
+  }
+}
+
 Export-ModuleMember -Variable utils_psm1_version
 Export-ModuleMember -Variable IsWindowsPowerShell
 Export-ModuleMember -Variable IsInGitSubmodule
@@ -336,7 +436,13 @@ Export-ModuleMember -Variable cuda_version_short_dashed
 Export-ModuleMember -Function getProgramFiles32bit
 Export-ModuleMember -Function getLatestVisualStudioWithDesktopWorkloadPath
 Export-ModuleMember -Function getLatestVisualStudioWithDesktopWorkloadVersion
+Export-ModuleMember -Function setupVisualStudio
 Export-ModuleMember -Function DownloadNinja
 Export-ModuleMember -Function DownloadAria2
 Export-ModuleMember -Function Download7Zip
+Export-ModuleMember -Function DownloadLicencpp
 Export-ModuleMember -Function MyThrow
+Export-ModuleMember -Function CopyTexFile
+Export-ModuleMember -Function dos2unix
+Export-ModuleMember -Function unix2dos
+Export-ModuleMember -Function UpdateRepo

From 5dc0126e2f1256992fb69d20ece9c04ff06c85a5 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Fri, 3 May 2024 15:41:13 +0200
Subject: [PATCH 4/9] Fixes #8897 (#8901)

---
 Dockerfile.cpu     | 6 ++----
 Dockerfile.gpu     | 8 ++++----
 docker-compose.yml | 6 +++---
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/Dockerfile.cpu b/Dockerfile.cpu
index 7a15cfb499e..db433f29b25 100644
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@@ -14,7 +14,7 @@ RUN rm Dockerfile.cpu
 
 RUN rm Dockerfile.gpu
 
-RUN rm Docker-compose.yml
+RUN rm docker-compose.yml
 
 RUN make
 
@@ -28,7 +28,7 @@ RUN apt-get install -y sudo libgomp1
 
 RUN useradd -U -m yolo
 
-RUN usermod -aG sudo yolo 
+RUN usermod -aG sudo yolo
 
 RUN usermod --shell /bin/bash yolo
 
@@ -45,5 +45,3 @@ RUN ldconfig
 WORKDIR /home/yolo/darknet
 
 USER yolo
-
-
diff --git a/Dockerfile.gpu b/Dockerfile.gpu
index c4a9effbbcc..f1985fbcfc0 100644
--- a/Dockerfile.gpu
+++ b/Dockerfile.gpu
@@ -14,11 +14,11 @@ RUN rm Dockerfile.cpu
 
 RUN rm Dockerfile.gpu
 
-RUN rm Docker-compose.yml
+RUN rm docker-compose.yml
 
 RUN make
 
-FROM nvidia/cuda:11.6.0-cudnn8-devel-ubuntu20.04
+FROM nvidia/cuda:11.6.1-cudnn8-devel-ubuntu20.04
 
 ENV DEBIAN_FRONTEND noninteractive
 
@@ -28,7 +28,7 @@ RUN apt-get install -y sudo libgomp1
 
 RUN useradd -U -m yolo
 
-RUN usermod -aG sudo yolo 
+RUN usermod -aG sudo yolo
 
 RUN usermod --shell /bin/bash yolo
 
@@ -44,4 +44,4 @@ RUN ldconfig
 
 WORKDIR /home/yolo/darknet
 
-USER yolo
\ No newline at end of file
+USER yolo
diff --git a/docker-compose.yml b/docker-compose.yml
index 968fbccaaf9..2eebd876997 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,7 @@ version: '2'
 
 services:
   yolo-gpu:
-    build: 
+    build:
       context: .
       dockerfile: Dockerfile.gpu
     image: yolo:gpu
@@ -14,7 +14,7 @@ services:
               count: 1
               capabilities: [gpu]
   yolo-cpu:
-    build: 
+    build:
       context: .
       dockerfile: Dockerfile.cpu
-    image: yolo:cpu
\ No newline at end of file
+    image: yolo:cpu

From 08167076fde493b8a8bea79d2a1973c8c3d479af Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Fri, 3 May 2024 15:50:31 +0200
Subject: [PATCH 5/9] Fixes #8885 (#8902)

---
 src/layer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/layer.c b/src/layer.c
index 032a24e0f2c..758644b567b 100644
--- a/src/layer.c
+++ b/src/layer.c
@@ -92,9 +92,9 @@ void free_layer_custom(layer l, int keep_cudnn_desc)
     if (l.bias_updates)       free(l.bias_updates), l.bias_updates = NULL;
     if (l.scales)             free(l.scales), l.scales = NULL;
     if (l.scale_updates)      free(l.scale_updates), l.scale_updates = NULL;
-    if (l.biases_ema)         free(l.biases_ema), l.biases = NULL;
-    if (l.scales_ema)         free(l.scales_ema), l.scales = NULL;
-    if (l.weights_ema)        free(l.weights_ema), l.weights = NULL;
+    if (l.biases_ema)         free(l.biases_ema), l.biases_ema = NULL;
+    if (l.scales_ema)         free(l.scales_ema), l.scales_ema = NULL;
+    if (l.weights_ema)        free(l.weights_ema), l.weights_ema = NULL;
     if (l.weights)            free(l.weights), l.weights = NULL;
     if (l.weight_updates)     free(l.weight_updates), l.weight_updates = NULL;
     if (l.align_bit_weights)  free(l.align_bit_weights);

From d02cc3a81689a1593dc5d0c8ed018d10338bebdd Mon Sep 17 00:00:00 2001
From: "Kyeongsoo Kim(KnsoF)" <56542593+K9714@users.noreply.github.com>
Date: Fri, 3 May 2024 22:53:53 +0900
Subject: [PATCH 6/9] Add YOLO Layer 'nbiases' (#8893)

Co-authored-by: KyeongSoo Kim <kyeongsoo.kim@konantech.com>
---
 src/yolo_layer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/yolo_layer.c b/src/yolo_layer.c
index ac464ba39b4..de9d0990455 100644
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@@ -29,6 +29,7 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int
     l.classes = classes;
     l.cost = (float*)xcalloc(1, sizeof(float));
     l.biases = (float*)xcalloc(total * 2, sizeof(float));
+    l.nbiases = total * 2;
     if(mask) l.mask = mask;
     else{
         l.mask = (int*)xcalloc(n, sizeof(int));

From d17ec15a06a9bde6e12de9354e1fde9888dd6de0 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Thu, 10 Oct 2024 09:59:36 +0200
Subject: [PATCH 7/9] update cuda to 12.6 (#8922)

* update cuda to 12.6
* remove cuda from opencv2 and opencv3
---
 .github/workflows/ccpp.yml        |  44 ++---------
 .github/workflows/on_pr.yml       |  44 ++---------
 .gitignore                        |   2 +
 CMakeLists.txt                    |   8 +-
 build.ps1                         |  12 ---
 scripts/requested_cuda_version.sh |   2 +-
 scripts/utils.psm1                | 106 ++++++++++++++++++++-----
 vcpkg.json                        |  46 +----------
 vcpkg.json.opencv23               | 126 ------------------------------
 9 files changed, 112 insertions(+), 278 deletions(-)
 delete mode 100644 vcpkg.json.opencv23

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index af7c5b9a77f..18aa31db638 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -147,7 +147,7 @@ jobs:
         path: ${{ github.workspace }}/uselib*
 
 
-  ubuntu-vcpkg-opencv3-cuda:
+  ubuntu-vcpkg-opencv3:
     runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@v3
@@ -161,16 +161,6 @@ jobs:
     - name: Clean downloads
       run: sudo apt-get clean
 
-    - name: 'Install CUDA'
-      run: ${{ github.workspace }}/scripts/deploy-cuda.sh
-
-    - name: 'Create softlinks for CUDA'
-      run: |
-        source ${{ github.workspace }}/scripts/requested_cuda_version.sh
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so.1
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so.1
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so
-
     - name: 'Setup vcpkg and NuGet artifacts backend'
       shell: bash
       run: >
@@ -192,15 +182,10 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      env:
-        CUDACXX: "/usr/local/cuda/bin/nvcc"
-        CUDA_PATH: "/usr/local/cuda"
-        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
-        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateTOOL
+      run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateTOOL
 
 
-  ubuntu-vcpkg-opencv2-cuda:
+  ubuntu-vcpkg-opencv2:
     runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@v3
@@ -214,16 +199,6 @@ jobs:
     - name: Clean downloads
       run: sudo apt-get clean
 
-    - name: 'Install CUDA'
-      run: ${{ github.workspace }}/scripts/deploy-cuda.sh
-
-    - name: 'Create softlinks for CUDA'
-      run: |
-        source ${{ github.workspace }}/scripts/requested_cuda_version.sh
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so.1
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so.1
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so
-
     - name: 'Setup vcpkg and NuGet artifacts backend'
       shell: bash
       run: >
@@ -245,12 +220,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      env:
-        CUDACXX: "/usr/local/cuda/bin/nvcc"
-        CUDA_PATH: "/usr/local/cuda"
-        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
-        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateTOOL
+      run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateTOOL
 
 
   ubuntu:
@@ -693,9 +663,9 @@ jobs:
 
     - name: 'Build'
       env:
-        CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2"
-        CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2"
-        CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2\\bin\\nvcc.exe"
+        CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6"
+        CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6"
+        CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6\\bin\\nvcc.exe"
       shell: pwsh
       run: ${{ github.workspace }}/build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateTOOL
 
diff --git a/.github/workflows/on_pr.yml b/.github/workflows/on_pr.yml
index 409523fd7e3..1ad990ba91a 100644
--- a/.github/workflows/on_pr.yml
+++ b/.github/workflows/on_pr.yml
@@ -106,7 +106,7 @@ jobs:
       run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateTOOL
 
 
-  ubuntu-vcpkg-opencv3-cuda:
+  ubuntu-vcpkg-opencv3:
     runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@v3
@@ -120,16 +120,6 @@ jobs:
     - name: Clean downloads
       run: sudo apt-get clean
 
-    - name: 'Install CUDA'
-      run: ${{ github.workspace }}/scripts/deploy-cuda.sh
-
-    - name: 'Create softlinks for CUDA'
-      run: |
-        source ${{ github.workspace }}/scripts/requested_cuda_version.sh
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so.1
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so.1
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so
-
     - name: 'Setup vcpkg and NuGet artifacts backend'
       shell: bash
       run: >
@@ -141,15 +131,10 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      env:
-        CUDACXX: "/usr/local/cuda/bin/nvcc"
-        CUDA_PATH: "/usr/local/cuda"
-        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
-        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateTOOL
+      run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateTOOL
 
 
-  ubuntu-vcpkg-opencv2-cuda:
+  ubuntu-vcpkg-opencv2:
     runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@v3
@@ -163,16 +148,6 @@ jobs:
     - name: Clean downloads
       run: sudo apt-get clean
 
-    - name: 'Install CUDA'
-      run: ${{ github.workspace }}/scripts/deploy-cuda.sh
-
-    - name: 'Create softlinks for CUDA'
-      run: |
-        source ${{ github.workspace }}/scripts/requested_cuda_version.sh
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so.1
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so.1
-        sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so
-
     - name: 'Setup vcpkg and NuGet artifacts backend'
       shell: bash
       run: >
@@ -184,12 +159,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      env:
-        CUDACXX: "/usr/local/cuda/bin/nvcc"
-        CUDA_PATH: "/usr/local/cuda"
-        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
-        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateTOOL
+      run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateTOOL
 
 
   ubuntu:
@@ -435,9 +405,9 @@ jobs:
 
     - name: 'Build'
       env:
-        CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2"
-        CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2"
-        CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2\\bin\\nvcc.exe"
+        CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6"
+        CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6"
+        CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6\\bin\\nvcc.exe"
       shell: pwsh
       run: ${{ github.workspace }}/build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateTOOL
 
diff --git a/.gitignore b/.gitignore
index bb62a6060b2..3166d82c7b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,6 +41,8 @@ build/detect_cuda_compute_capabilities.cu
 build/.ninja_deps
 build/.ninja_log
 build/Makefile
+CMakeFiles/
+CMakeCache.txt
 */vcpkg-manifest-install.log
 build.log
 __pycache__/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4710e39def8..fc34cc1e5a5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,17 +52,17 @@ if(ENABLE_OPENCV)
     if(VCPKG_USE_OPENCV4)
       list(APPEND VCPKG_MANIFEST_FEATURES "opencv-cuda")
     elseif(VCPKG_USE_OPENCV3)
-      list(APPEND VCPKG_MANIFEST_FEATURES "opencv3-cuda")
+      list(APPEND VCPKG_MANIFEST_FEATURES "opencv3")
     elseif(VCPKG_USE_OPENCV2)
-      list(APPEND VCPKG_MANIFEST_FEATURES "opencv2-cuda")
+      list(APPEND VCPKG_MANIFEST_FEATURES "opencv2")
     endif()
   else()
     if(VCPKG_USE_OPENCV4)
       list(APPEND VCPKG_MANIFEST_FEATURES "opencv-base")
     elseif(VCPKG_USE_OPENCV3)
-      list(APPEND VCPKG_MANIFEST_FEATURES "opencv3-base")
+      list(APPEND VCPKG_MANIFEST_FEATURES "opencv3")
     elseif(VCPKG_USE_OPENCV2)
-      list(APPEND VCPKG_MANIFEST_FEATURES "opencv2-base")
+      list(APPEND VCPKG_MANIFEST_FEATURES "opencv2")
     endif()
   endif()
 endif()
diff --git a/build.ps1 b/build.ps1
index de8916d94a6..07ecf1f17a8 100755
--- a/build.ps1
+++ b/build.ps1
@@ -907,21 +907,14 @@ if ($BuildInstaller) {
 
 if (($ForceOpenCVVersion -eq 2) -and $UseVCPKG) {
   Write-Host "You requested OpenCV version 2, so vcpkg will install that version" -ForegroundColor Yellow
-  Write-Host "This requires using vcpkg.json.opencv23 as manifest file" -ForegroundColor Yellow
   $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_USE_OPENCV4=OFF -DVCPKG_USE_OPENCV2=ON"
 }
 
 if (($ForceOpenCVVersion -eq 3) -and $UseVCPKG) {
   Write-Host "You requested OpenCV version 3, so vcpkg will install that version" -ForegroundColor Yellow
-  Write-Host "This requires using vcpkg.json.opencv23 as manifest file" -ForegroundColor Yellow
   $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_USE_OPENCV4=OFF -DVCPKG_USE_OPENCV3=ON"
 }
 
-if($ForceOpenCVVersion -gt 0) {
-  Move-Item $PSCustomScriptRoot/vcpkg.json $PSCustomScriptRoot/vcpkg.json.bak
-  Move-Item $PSCustomScriptRoot/vcpkg.json.opencv23 $PSCustomScriptRoot/vcpkg.json
-}
-
 if ($UseVCPKG -and $ForceVCPKGCacheRemoval) {
   if ($IsWindows -or $IsWindowsPowerShell) {
     $vcpkgbinarycachepath = "$env:LOCALAPPDATA/vcpkg/archive"
@@ -1175,11 +1168,6 @@ if ($vcpkg_host_triplet_set_by_this_script) {
   $env:VCPKG_DEFAULT_HOST_TRIPLET = $null
 }
 
-if($ForceOpenCVVersion -gt 0) {
-  Move-Item $PSCustomScriptRoot/vcpkg.json $PSCustomScriptRoot/vcpkg.json.opencv23
-  Move-Item $PSCustomScriptRoot/vcpkg.json.bak $PSCustomScriptRoot/vcpkg.json
-}
-
 if ($vcpkg_branch_set_by_this_script) {
   Push-Location $vcpkg_path
   $git_args = "checkout -"
diff --git a/scripts/requested_cuda_version.sh b/scripts/requested_cuda_version.sh
index ae9ea2a4f55..8cb3f6ad9d6 100755
--- a/scripts/requested_cuda_version.sh
+++ b/scripts/requested_cuda_version.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 
-export CUDA_VERSION="12.2"
+export CUDA_VERSION="12.6"
 export CUDA_VERSION_DASHED="${CUDA_VERSION//./-}"
diff --git a/scripts/utils.psm1 b/scripts/utils.psm1
index 6e22056957c..cf4f0dccf64 100644
--- a/scripts/utils.psm1
+++ b/scripts/utils.psm1
@@ -22,7 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 #>
 
-$utils_psm1_version = "1.2.3"
+$utils_psm1_version = "1.4.1"
 $IsWindowsPowerShell = switch ( $PSVersionTable.PSVersion.Major ) {
   5 { $true }
   4 { $true }
@@ -38,6 +38,31 @@ if ($IsWindowsPowerShell -or $IsWindows) {
 
 $64bitPwsh = $([Environment]::Is64BitProcess)
 $64bitOS = $([Environment]::Is64BitOperatingSystem)
+$osArchitecture = [System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture
+switch ($osArchitecture) {
+  "X86" {
+    $vcpkgArchitecture = "x86"
+    $vsArchitecture = "Win32"
+  }
+  "X64" {
+    $vcpkgArchitecture = "x64"
+    $vsArchitecture = "x64"
+  }
+  "Arm" {
+    $vcpkgArchitecture = "arm"
+    $vsArchitecture = "arm"
+  }
+  "Arm64" {
+    $vcpkgArchitecture = "arm64"
+    $vsArchitecture = "arm64"
+  }
+  default {
+    $vcpkgArchitecture = "x64"
+    $vsArchitecture = "x64"
+    Write-Output "Unknown architecture. Trying x64"
+  }
+}
+
 
 Push-Location $PSScriptRoot
 $GIT_EXE = Get-Command "git" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition
@@ -55,11 +80,36 @@ else {
 }
 Pop-Location
 
-$cuda_version_full = "12.2.0"
-$cuda_version_short = "12.2"
+$cuda_version_full = "12.6.2"
+$cuda_version_short = "12.6"
 $cuda_version_full_dashed = $cuda_version_full.replace('.', '-')
 $cuda_version_short_dashed = $cuda_version_short.replace('.', '-')
 
+function activateVenv([string]$VenvPath) {
+  if ($IsWindowsPowerShell -or $IsWindows) {
+    $activate_script = "$VenvPath/Scripts/Activate.ps1"
+  }
+  else {
+    $activate_script = "$VenvPath/bin/Activate.ps1"
+  }
+
+  $activate_script = Resolve-Path $activate_script
+  $VenvPath = Resolve-Path $VenvPath
+
+  if ($env:VIRTUAL_ENV -eq $VenvPath) {
+    Write-Host "Venv already activated"
+    return
+  }
+  else {
+    Write-Host "Activating venv"
+    if (-Not (Test-Path $activate_script)) {
+      MyThrow("Could not find activate script at $activate_script")
+    }
+    & $activate_script
+  }
+}
+
+
 function getProgramFiles32bit() {
   $out = ${env:PROGRAMFILES(X86)}
   if ($null -eq $out) {
@@ -167,21 +217,34 @@ function getLatestVisualStudioWithDesktopWorkloadVersion([bool]$required = $true
   return $installationVersion
 }
 
-function setupVisualStudio([bool]$required = $true) {
+function setupVisualStudio([bool]$required = $true, [bool]$enable_clang = $false) {
   $CL_EXE = Get-Command "cl" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition
-  if ((-Not $CL_EXE) -or ($CL_EXE -match "HostX86\\x86") -or ($CL_EXE -match "HostX64\\x86")) {
-    $vsfound = getLatestVisualStudioWithDesktopWorkloadPath
-    Write-Host "Found VS in ${vsfound}"
-    Push-Location "${vsfound}/Common7/Tools"
-    cmd.exe /c "VsDevCmd.bat -arch=x64 & set" |
-    ForEach-Object {
-      if ($_ -match "=") {
-        $v = $_.split("="); Set-Item -force -path "ENV:\$($v[0])"  -value "$($v[1])"
+  if (-Not $CL_EXE) {
+    $vsfound = getLatestVisualStudioWithDesktopWorkloadPath($required)
+    if (-Not $vsfound) {
+      if ($required) {
+        MyThrow("Could not locate any installation of Visual Studio")
+      }
+      else {
+        Write-Host "Could not locate any installation of Visual Studio" -ForegroundColor Red
+        return
+      }
+    }
+    else {
+      Write-Host "Found VS in ${vsfound}"
+      Push-Location "${vsfound}/Common7/Tools"
+      cmd.exe /c "VsDevCmd.bat -arch=${vsArchitecture} & set" |
+      ForEach-Object {
+        if ($_ -match "=") {
+          $v = $_.split("="); Set-Item -force -path "ENV:\$($v[0])" -value "$($v[1])"
+        }
+      }
+      Pop-Location
+      if ($enable_clang) {
+        $env:PATH = "${vsfound}/VC/Tools/Llvm/${vsArchitecture}/bin;$env:PATH"
       }
+      Write-Host "Visual Studio Command Prompt variables set"
     }
-    Pop-Location
-    $env:PATH = "${vsfound}/VC/Tools/Llvm/x64/bin;$env:PATH"
-    Write-Host "Visual Studio Command Prompt variables set"
   }
 }
 
@@ -190,13 +253,13 @@ function DownloadNinja() {
   Remove-Item -Force -Recurse -ErrorAction SilentlyContinue ninja
   Remove-Item -Force -ErrorAction SilentlyContinue ninja.zip
   if ($IsWindows -or $IsWindowsPowerShell) {
-    $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip"
+    $url = "https://github.com/ninja-build/ninja/releases/download/v1.12.1/ninja-win.zip"
   }
   elseif ($IsLinux) {
-    $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip"
+    $url = "https://github.com/ninja-build/ninja/releases/download/v1.12.1/ninja-linux.zip"
   }
   elseif ($IsMacOS) {
-    $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-mac.zip"
+    $url = "https://github.com/ninja-build/ninja/releases/download/v1.12.1/ninja-mac.zip"
   }
   else {
     MyThrow("Unknown OS, unsupported")
@@ -247,7 +310,7 @@ function DownloadAria2() {
 }
 
 function DownloadLicencpp() {
-  $licencpp_version = "0.2.1"
+  $licencpp_version = "0.2.5"
   Write-Host "Downloading a portable version of licencpp v${licencpp_version}" -ForegroundColor Yellow
   if ($IsWindows -or $IsWindowsPowerShell) {
     $basename = "licencpp-Windows"
@@ -433,6 +496,11 @@ Export-ModuleMember -Variable cuda_version_full
 Export-ModuleMember -Variable cuda_version_short
 Export-ModuleMember -Variable cuda_version_full_dashed
 Export-ModuleMember -Variable cuda_version_short_dashed
+Export-ModuleMember -Variable osArchitecture
+Export-ModuleMember -Variable vcpkgArchitecture
+Export-ModuleMember -Variable vsArchitecture
+Export-ModuleMember -Variable ExecutableSuffix
+Export-ModuleMember -Function activateVenv
 Export-ModuleMember -Function getProgramFiles32bit
 Export-ModuleMember -Function getLatestVisualStudioWithDesktopWorkloadPath
 Export-ModuleMember -Function getLatestVisualStudioWithDesktopWorkloadVersion
diff --git a/vcpkg.json b/vcpkg.json
index 6f142bed231..752d0fc3669 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -73,54 +73,16 @@
         }
       ]
     },
-    "opencv2-base": {
+    "opencv2": {
       "description": "Build darknet with support for OpenCV2",
       "dependencies": [
-        {
-          "name": "opencv2",
-          "features": [
-            "ffmpeg"
-          ]
-        }
-      ]
-    },
-    "opencv2-cuda": {
-      "description": "Build darknet with support for CUDA-enabled OpenCV2",
-      "dependencies": [
-        {
-          "name": "opencv2",
-          "features": [
-            "cuda",
-            "ffmpeg"
-          ]
-        }
+        "opencv2"
       ]
     },
-    "opencv3-base": {
+    "opencv3": {
       "description": "Build darknet with support for OpenCV3",
       "dependencies": [
-        {
-          "name": "opencv3",
-          "features": [
-            "contrib",
-            "dnn",
-            "ffmpeg"
-          ]
-        }
-      ]
-    },
-    "opencv3-cuda": {
-      "description": "Build darknet with support for CUDA-enabled OpenCV3",
-      "dependencies": [
-        {
-          "name": "opencv3",
-          "features": [
-            "contrib",
-            "cuda",
-            "dnn",
-            "ffmpeg"
-          ]
-        }
+        "opencv3"
       ]
     }
   }
diff --git a/vcpkg.json.opencv23 b/vcpkg.json.opencv23
deleted file mode 100644
index 0a45be9a456..00000000000
--- a/vcpkg.json.opencv23
+++ /dev/null
@@ -1,126 +0,0 @@
-{
-  "name": "darknet",
-  "version": "0.2.5.7",
-  "description": "Darknet is an open source neural network framework written in C and CUDA. You only look once (YOLO) is a state-of-the-art, real-time object detection system, best example of darknet functionalities.",
-  "homepage": "https://github.com/alexeyab/darknet",
-  "dependencies": [
-    {
-      "name": "getopt",
-      "platform": "windows & !mingw"
-    },
-    "pthreads",
-    "stb"
-  ],
-  "features": {
-    "cuda": {
-      "description": "Build darknet with support for CUDA",
-      "dependencies": [
-        "cuda"
-      ]
-    },
-    "cudnn": {
-      "description": "Build darknet with support for cuDNN",
-      "dependencies": [
-        "cuda",
-        "cudnn"
-      ]
-    },
-    "full": {
-      "description": "Build darknet fully featured",
-      "dependencies": [
-        {
-          "name": "darknet",
-          "features": [
-            "cuda",
-            "cudnn",
-            "opencv-cuda"
-          ]
-        }
-      ]
-    },
-    "opencv-base": {
-      "description": "Build darknet with support for latest version of OpenCV",
-      "dependencies": [
-        {
-          "name": "opencv",
-          "features": [
-            "contrib",
-            "dnn",
-            "ffmpeg"
-          ]
-        }
-      ]
-    },
-    "opencv-cuda": {
-      "description": "Build darknet with support for latest version of CUDA-enabled OpenCV",
-      "dependencies": [
-        {
-          "name": "opencv",
-          "features": [
-            "contrib",
-            "cuda",
-            "dnn",
-            "ffmpeg"
-          ]
-        }
-      ]
-    },
-    "opencv2-base": {
-      "description": "Build darknet with support for OpenCV2",
-      "dependencies": [
-        {
-          "name": "opencv2",
-          "features": [
-            "ffmpeg"
-          ]
-        }
-      ]
-    },
-    "opencv2-cuda": {
-      "description": "Build darknet with support for CUDA-enabled OpenCV2",
-      "dependencies": [
-        {
-          "name": "opencv2",
-          "features": [
-            "cuda",
-            "ffmpeg"
-          ]
-        }
-      ]
-    },
-    "opencv3-base": {
-      "description": "Build darknet with support for OpenCV3",
-      "dependencies": [
-        {
-          "name": "opencv3",
-          "features": [
-            "contrib",
-            "dnn",
-            "ffmpeg"
-          ]
-        }
-      ]
-    },
-    "opencv3-cuda": {
-      "description": "Build darknet with support for CUDA-enabled OpenCV3",
-      "dependencies": [
-        {
-          "name": "opencv3",
-          "features": [
-            "contrib",
-            "cuda",
-            "dnn",
-            "ffmpeg"
-          ]
-        }
-      ]
-    }
-  },
-  "overrides": [
-    {
-      "name": "ffmpeg",
-      "version": "4.4.3"
-    }
-  ],
-  "builtin-baseline": "54cc53c43430c73f489e52af5fadd032c1aced16"
-}

From 19dde2f296941a75b0b9202cccd59528bde7f65a Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Tue, 22 Oct 2024 18:17:16 +0200
Subject: [PATCH 8/9] fix runtime libs (#8924)

---
 CMakeLists.txt | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fc34cc1e5a5..8c65eeff301 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,7 @@ option(VCPKG_USE_OPENCV2 "Use legacy OpenCV 2" OFF)
 option(VCPKG_USE_OPENCV3 "Use legacy OpenCV 3" OFF)
 option(VCPKG_USE_OPENCV4 "Use OpenCV 4" ON)
 option(USE_NSIS "Use NSIS as a CPack backend on Windows" ON)
+option(SKIP_INSTALL_RUNTIME_LIBS "Do not install runtime libs" OFF)
 
 if(DEFINED ENV{VCPKG_DEFAULT_TRIPLET})
   message(STATUS "Setting default vcpkg target triplet to $ENV{VCPKG_DEFAULT_TRIPLET}")
@@ -67,11 +68,7 @@ if(ENABLE_OPENCV)
   endif()
 endif()
 
-if(NOT CMAKE_HOST_SYSTEM_PROCESSOR AND NOT WIN32)
-  execute_process(COMMAND "uname" "-m" OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR OUTPUT_STRIP_TRAILING_WHITESPACE)
-endif()
-
-if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86")
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^AMD64")
   set(IS_X86 TRUE)
 else()
   set(IS_X86 FALSE)
@@ -672,13 +669,14 @@ if(ENABLE_CSHARP_WRAPPER)
   add_subdirectory(src/csharp)
 endif()
 
-set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_SKIP TRUE)
-include(InstallRequiredSystemLibraries)
-
-install(
+if (NOT SKIP_INSTALL_RUNTIME_LIBS)
+  set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_SKIP TRUE)
+  include(InstallRequiredSystemLibraries)
+  install(
     PROGRAMS ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS}
     DESTINATION ${INSTALL_BIN_DIR}
-)
+  )
+endif()
 
 if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vcpkg.json)
   file(READ ${CMAKE_CURRENT_SOURCE_DIR}/vcpkg.json VCPKG_JSON_STRING)

From 9ade741db91fd3d796d2abb0c9889b10943ea28a Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Wed, 6 Nov 2024 15:54:59 +0100
Subject: [PATCH 9/9] update stb (#8926)

---
 3rdparty/stb/include/stb_image.h | 355 ++++++++++++++++---------------
 1 file changed, 178 insertions(+), 177 deletions(-)

diff --git a/3rdparty/stb/include/stb_image.h b/3rdparty/stb/include/stb_image.h
index 5e807a0a6e7..9eedabedc45 100644
--- a/3rdparty/stb/include/stb_image.h
+++ b/3rdparty/stb/include/stb_image.h
@@ -1,4 +1,4 @@
-/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb
+/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb
                                   no warranty implied; use at your own risk
 
    Do this:
@@ -48,6 +48,8 @@ LICENSE
 
 RECENT REVISION HISTORY:
 
+      2.30  (2024-05-31) avoid erroneous gcc warning
+      2.29  (2023-05-xx) optimizations
       2.28  (2023-01-29) many error fixes, security errors, just tons of stuff
       2.27  (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
       2.26  (2020-07-13) many minor fixes
@@ -1072,8 +1074,8 @@ static int stbi__addints_valid(int a, int b)
    return a <= INT_MAX - b;
 }
 
-// returns 1 if the product of two signed shorts is valid, 0 on overflow.
-static int stbi__mul2shorts_valid(short a, short b)
+// returns 1 if the product of two ints fits in a signed short, 0 on overflow.
+static int stbi__mul2shorts_valid(int a, int b)
 {
    if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
    if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
@@ -3384,13 +3386,13 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
    return 1;
 }
 
-static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
+static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
 {
    // some JPEGs have junk at end, skip over it but if we find what looks
    // like a valid marker, resume there
    while (!stbi__at_eof(j->s)) {
-      int x = stbi__get8(j->s);
-      while (x == 255) { // might be a marker
+      stbi_uc x = stbi__get8(j->s);
+      while (x == 0xff) { // might be a marker
          if (stbi__at_eof(j->s)) return STBI__MARKER_none;
          x = stbi__get8(j->s);
          if (x != 0x00 && x != 0xff) {
@@ -4176,6 +4178,7 @@ typedef struct
 {
    stbi_uc *zbuffer, *zbuffer_end;
    int num_bits;
+   int hit_zeof_once;
    stbi__uint32 code_buffer;
 
    char *zout;
@@ -4242,9 +4245,20 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
    int b,s;
    if (a->num_bits < 16) {
       if (stbi__zeof(a)) {
-         return -1;   /* report error for unexpected end of data. */
+         if (!a->hit_zeof_once) {
+            // This is the first time we hit eof, insert 16 extra padding btis
+            // to allow us to keep going; if we actually consume any of them
+            // though, that is invalid data. This is caught later.
+            a->hit_zeof_once = 1;
+            a->num_bits += 16; // add 16 implicit zero bits
+         } else {
+            // We already inserted our extra 16 padding bits and are again
+            // out, this stream is actually prematurely terminated.
+            return -1;
+         }
+      } else {
+         stbi__fill_bits(a);
       }
-      stbi__fill_bits(a);
    }
    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
    if (b) {
@@ -4309,6 +4323,13 @@ static int stbi__parse_huffman_block(stbi__zbuf *a)
          int len,dist;
          if (z == 256) {
             a->zout = zout;
+            if (a->hit_zeof_once && a->num_bits < 16) {
+               // The first time we hit zeof, we inserted 16 extra zero bits into our bit
+               // buffer so the decoder can just do its speculative decoding. But if we
+               // actually consumed any of those bits (which is the case when num_bits < 16),
+               // the stream actually read past the end so it is malformed.
+               return stbi__err("unexpected end","Corrupt PNG");
+            }
             return 1;
          }
          if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
@@ -4320,7 +4341,7 @@ static int stbi__parse_huffman_block(stbi__zbuf *a)
          dist = stbi__zdist_base[z];
          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
-         if (zout + len > a->zout_end) {
+         if (len > a->zout_end - zout) {
             if (!stbi__zexpand(a, zout, len)) return 0;
             zout = a->zout;
          }
@@ -4464,6 +4485,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
       if (!stbi__parse_zlib_header(a)) return 0;
    a->num_bits = 0;
    a->code_buffer = 0;
+   a->hit_zeof_once = 0;
    do {
       final = stbi__zreceive(a,1);
       type = stbi__zreceive(a,2);
@@ -4619,9 +4641,8 @@ enum {
    STBI__F_up=2,
    STBI__F_avg=3,
    STBI__F_paeth=4,
-   // synthetic filters used for first scanline to avoid needing a dummy row of 0s
-   STBI__F_avg_first,
-   STBI__F_paeth_first
+   // synthetic filter used for first scanline to avoid needing a dummy row of 0s
+   STBI__F_avg_first
 };
 
 static stbi_uc first_row_filter[5] =
@@ -4630,29 +4651,56 @@ static stbi_uc first_row_filter[5] =
    STBI__F_sub,
    STBI__F_none,
    STBI__F_avg_first,
-   STBI__F_paeth_first
+   STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub
 };
 
 static int stbi__paeth(int a, int b, int c)
 {
-   int p = a + b - c;
-   int pa = abs(p-a);
-   int pb = abs(p-b);
-   int pc = abs(p-c);
-   if (pa <= pb && pa <= pc) return a;
-   if (pb <= pc) return b;
-   return c;
+   // This formulation looks very different from the reference in the PNG spec, but is
+   // actually equivalent and has favorable data dependencies and admits straightforward
+   // generation of branch-free code, which helps performance significantly.
+   int thresh = c*3 - (a + b);
+   int lo = a < b ? a : b;
+   int hi = a < b ? b : a;
+   int t0 = (hi <= thresh) ? lo : c;
+   int t1 = (thresh <= lo) ? hi : t0;
+   return t1;
 }
 
 static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
 
+// adds an extra all-255 alpha channel
+// dest == src is legal
+// img_n must be 1 or 3
+static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n)
+{
+   int i;
+   // must process data backwards since we allow dest==src
+   if (img_n == 1) {
+      for (i=x-1; i >= 0; --i) {
+         dest[i*2+1] = 255;
+         dest[i*2+0] = src[i];
+      }
+   } else {
+      STBI_ASSERT(img_n == 3);
+      for (i=x-1; i >= 0; --i) {
+         dest[i*4+3] = 255;
+         dest[i*4+2] = src[i*3+2];
+         dest[i*4+1] = src[i*3+1];
+         dest[i*4+0] = src[i*3+0];
+      }
+   }
+}
+
 // create the png data from post-deflated data
 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
 {
-   int bytes = (depth == 16? 2 : 1);
+   int bytes = (depth == 16 ? 2 : 1);
    stbi__context *s = a->s;
    stbi__uint32 i,j,stride = x*out_n*bytes;
    stbi__uint32 img_len, img_width_bytes;
+   stbi_uc *filter_buf;
+   int all_ok = 1;
    int k;
    int img_n = s->img_n; // copy it into a local for later
 
@@ -4664,8 +4712,11 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
    a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
    if (!a->out) return stbi__err("outofmem", "Out of memory");
 
+   // note: error exits here don't need to clean up a->out individually,
+   // stbi__do_png always does on error.
    if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
+   if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG");
    img_len = (img_width_bytes + 1) * y;
 
    // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
@@ -4673,189 +4724,137 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
    // so just check for raw_len < img_len always.
    if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
 
+   // Allocate two scan lines worth of filter workspace buffer.
+   filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0);
+   if (!filter_buf) return stbi__err("outofmem", "Out of memory");
+
+   // Filtering for low-bit-depth images
+   if (depth < 8) {
+      filter_bytes = 1;
+      width = img_width_bytes;
+   }
+
    for (j=0; j < y; ++j) {
-      stbi_uc *cur = a->out + stride*j;
-      stbi_uc *prior;
+      // cur/prior filter buffers alternate
+      stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes;
+      stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes;
+      stbi_uc *dest = a->out + stride*j;
+      int nk = width * filter_bytes;
       int filter = *raw++;
 
-      if (filter > 4)
-         return stbi__err("invalid filter","Corrupt PNG");
-
-      if (depth < 8) {
-         if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG");
-         cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
-         filter_bytes = 1;
-         width = img_width_bytes;
+      // check filter type
+      if (filter > 4) {
+         all_ok = stbi__err("invalid filter","Corrupt PNG");
+         break;
       }
-      prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
 
       // if first row, use special filter that doesn't sample previous row
       if (j == 0) filter = first_row_filter[filter];
 
-      // handle first byte explicitly
-      for (k=0; k < filter_bytes; ++k) {
-         switch (filter) {
-            case STBI__F_none       : cur[k] = raw[k]; break;
-            case STBI__F_sub        : cur[k] = raw[k]; break;
-            case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
-            case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
-            case STBI__F_avg_first  : cur[k] = raw[k]; break;
-            case STBI__F_paeth_first: cur[k] = raw[k]; break;
-         }
-      }
-
-      if (depth == 8) {
-         if (img_n != out_n)
-            cur[img_n] = 255; // first pixel
-         raw += img_n;
-         cur += out_n;
-         prior += out_n;
-      } else if (depth == 16) {
-         if (img_n != out_n) {
-            cur[filter_bytes]   = 255; // first pixel top byte
-            cur[filter_bytes+1] = 255; // first pixel bottom byte
-         }
-         raw += filter_bytes;
-         cur += output_bytes;
-         prior += output_bytes;
-      } else {
-         raw += 1;
-         cur += 1;
-         prior += 1;
+      // perform actual filtering
+      switch (filter) {
+      case STBI__F_none:
+         memcpy(cur, raw, nk);
+         break;
+      case STBI__F_sub:
+         memcpy(cur, raw, filter_bytes);
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]);
+         break;
+      case STBI__F_up:
+         for (k = 0; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
+         break;
+      case STBI__F_avg:
+         for (k = 0; k < filter_bytes; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1));
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1));
+         break;
+      case STBI__F_paeth:
+         for (k = 0; k < filter_bytes; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0)
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes]));
+         break;
+      case STBI__F_avg_first:
+         memcpy(cur, raw, filter_bytes);
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1));
+         break;
       }
 
-      // this is a little gross, so that we don't switch per-pixel or per-component
-      if (depth < 8 || img_n == out_n) {
-         int nk = (width - 1)*filter_bytes;
-         #define STBI__CASE(f) \
-             case f:     \
-                for (k=0; k < nk; ++k)
-         switch (filter) {
-            // "none" filter turns into a memcpy here; make that explicit.
-            case STBI__F_none:         memcpy(cur, raw, nk); break;
-            STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
-            STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
-            STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
-            STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
-            STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
-            STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
-         }
-         #undef STBI__CASE
-         raw += nk;
-      } else {
-         STBI_ASSERT(img_n+1 == out_n);
-         #define STBI__CASE(f) \
-             case f:     \
-                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
-                   for (k=0; k < filter_bytes; ++k)
-         switch (filter) {
-            STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
-            STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
-            STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
-            STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
-            STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
-            STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
-            STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
-         }
-         #undef STBI__CASE
-
-         // the loop above sets the high byte of the pixels' alpha, but for
-         // 16 bit png files we also need the low byte set. we'll do that here.
-         if (depth == 16) {
-            cur = a->out + stride*j; // start at the beginning of the row again
-            for (i=0; i < x; ++i,cur+=output_bytes) {
-               cur[filter_bytes+1] = 255;
-            }
-         }
-      }
-   }
+      raw += nk;
 
-   // we make a separate pass to expand bits to pixels; for performance,
-   // this could run two scanlines behind the above code, so it won't
-   // intefere with filtering but will still be in the cache.
-   if (depth < 8) {
-      for (j=0; j < y; ++j) {
-         stbi_uc *cur = a->out + stride*j;
-         stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
-         // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
-         // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
+      // expand decoded bits in cur to dest, also adding an extra alpha channel if desired
+      if (depth < 8) {
          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+         stbi_uc *in = cur;
+         stbi_uc *out = dest;
+         stbi_uc inb = 0;
+         stbi__uint32 nsmp = x*img_n;
 
-         // note that the final byte might overshoot and write more data than desired.
-         // we can allocate enough data that this never writes out of memory, but it
-         // could also overwrite the next scanline. can it overwrite non-empty data
-         // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
-         // so we need to explicitly clamp the final ones
-
+         // expand bits to bytes first
          if (depth == 4) {
-            for (k=x*img_n; k >= 2; k-=2, ++in) {
-               *cur++ = scale * ((*in >> 4)       );
-               *cur++ = scale * ((*in     ) & 0x0f);
+            for (i=0; i < nsmp; ++i) {
+               if ((i & 1) == 0) inb = *in++;
+               *out++ = scale * (inb >> 4);
+               inb <<= 4;
             }
-            if (k > 0) *cur++ = scale * ((*in >> 4)       );
          } else if (depth == 2) {
-            for (k=x*img_n; k >= 4; k-=4, ++in) {
-               *cur++ = scale * ((*in >> 6)       );
-               *cur++ = scale * ((*in >> 4) & 0x03);
-               *cur++ = scale * ((*in >> 2) & 0x03);
-               *cur++ = scale * ((*in     ) & 0x03);
+            for (i=0; i < nsmp; ++i) {
+               if ((i & 3) == 0) inb = *in++;
+               *out++ = scale * (inb >> 6);
+               inb <<= 2;
             }
-            if (k > 0) *cur++ = scale * ((*in >> 6)       );
-            if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
-            if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
-         } else if (depth == 1) {
-            for (k=x*img_n; k >= 8; k-=8, ++in) {
-               *cur++ = scale * ((*in >> 7)       );
-               *cur++ = scale * ((*in >> 6) & 0x01);
-               *cur++ = scale * ((*in >> 5) & 0x01);
-               *cur++ = scale * ((*in >> 4) & 0x01);
-               *cur++ = scale * ((*in >> 3) & 0x01);
-               *cur++ = scale * ((*in >> 2) & 0x01);
-               *cur++ = scale * ((*in >> 1) & 0x01);
-               *cur++ = scale * ((*in     ) & 0x01);
+         } else {
+            STBI_ASSERT(depth == 1);
+            for (i=0; i < nsmp; ++i) {
+               if ((i & 7) == 0) inb = *in++;
+               *out++ = scale * (inb >> 7);
+               inb <<= 1;
             }
-            if (k > 0) *cur++ = scale * ((*in >> 7)       );
-            if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
-            if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
-            if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
-            if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
-            if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
-            if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
          }
-         if (img_n != out_n) {
-            int q;
-            // insert alpha = 255
-            cur = a->out + stride*j;
+
+         // insert alpha=255 values if desired
+         if (img_n != out_n)
+            stbi__create_png_alpha_expand8(dest, dest, x, img_n);
+      } else if (depth == 8) {
+         if (img_n == out_n)
+            memcpy(dest, cur, x*img_n);
+         else
+            stbi__create_png_alpha_expand8(dest, cur, x, img_n);
+      } else if (depth == 16) {
+         // convert the image data from big-endian to platform-native
+         stbi__uint16 *dest16 = (stbi__uint16*)dest;
+         stbi__uint32 nsmp = x*img_n;
+
+         if (img_n == out_n) {
+            for (i = 0; i < nsmp; ++i, ++dest16, cur += 2)
+               *dest16 = (cur[0] << 8) | cur[1];
+         } else {
+            STBI_ASSERT(img_n+1 == out_n);
             if (img_n == 1) {
-               for (q=x-1; q >= 0; --q) {
-                  cur[q*2+1] = 255;
-                  cur[q*2+0] = cur[q];
+               for (i = 0; i < x; ++i, dest16 += 2, cur += 2) {
+                  dest16[0] = (cur[0] << 8) | cur[1];
+                  dest16[1] = 0xffff;
                }
             } else {
                STBI_ASSERT(img_n == 3);
-               for (q=x-1; q >= 0; --q) {
-                  cur[q*4+3] = 255;
-                  cur[q*4+2] = cur[q*3+2];
-                  cur[q*4+1] = cur[q*3+1];
-                  cur[q*4+0] = cur[q*3+0];
+               for (i = 0; i < x; ++i, dest16 += 4, cur += 6) {
+                  dest16[0] = (cur[0] << 8) | cur[1];
+                  dest16[1] = (cur[2] << 8) | cur[3];
+                  dest16[2] = (cur[4] << 8) | cur[5];
+                  dest16[3] = 0xffff;
                }
             }
          }
       }
-   } else if (depth == 16) {
-      // force the image data from big-endian to platform-native.
-      // this is done in a separate pass due to the decoding relying
-      // on the data being untouched, but could probably be done
-      // per-line during decode if care is taken.
-      stbi_uc *cur = a->out;
-      stbi__uint16 *cur16 = (stbi__uint16*)cur;
-
-      for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
-         *cur16 = (cur[0] << 8) | cur[1];
-      }
    }
 
+   STBI_FREE(filter_buf);
+   if (!all_ok) return 0;
+
    return 1;
 }
 
@@ -5161,9 +5160,11 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
                // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
                if (scan == STBI__SCAN_header) { ++s->img_n; return 1; }
                if (z->depth == 16) {
-                  for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
+                  for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning
+                     tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
                } else {
-                  for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
+                  for (k = 0; k < s->img_n && k < 3; ++k)
+                     tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
                }
             }
             break;