diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index ad386f42c91..18aa31db638 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -87,7 +87,7 @@ jobs: - name: Update apt run: sudo apt update - name: Install dependencies - run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev + run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev - name: Clean downloads run: sudo apt-get clean @@ -147,7 +147,7 @@ jobs: path: ${{ github.workspace }}/uselib* - ubuntu-vcpkg-opencv3-cuda: + ubuntu-vcpkg-opencv3: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 @@ -157,20 +157,10 @@ jobs: - name: Update apt run: sudo apt update - name: Install dependencies - run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev + run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev - name: Clean downloads run: sudo apt-get clean - - name: 'Install CUDA' - run: ${{ github.workspace }}/scripts/deploy-cuda.sh - - - name: 'Create softlinks for CUDA' - run: | - source ${{ github.workspace }}/scripts/requested_cuda_version.sh - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so.1 - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so.1 - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so - - name: 'Setup vcpkg and NuGet artifacts backend' shell: bash run: > @@ -192,15 +182,10 @@ jobs: - name: 'Build' shell: pwsh - env: - CUDACXX: "/usr/local/cuda/bin/nvcc" - CUDA_PATH: "/usr/local/cuda" - CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" - LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" - run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateTOOL + run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateTOOL - ubuntu-vcpkg-opencv2-cuda: + ubuntu-vcpkg-opencv2: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 @@ -210,20 +195,10 @@ jobs: - name: Update apt run: sudo apt update - name: Install dependencies - run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev + run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev - name: Clean downloads run: sudo apt-get clean - - name: 'Install CUDA' - run: ${{ github.workspace }}/scripts/deploy-cuda.sh - - - name: 'Create softlinks for CUDA' - run: | - source ${{ github.workspace }}/scripts/requested_cuda_version.sh - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so.1 - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so.1 - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so - - name: 'Setup vcpkg and NuGet artifacts backend' shell: bash run: > @@ -245,12 +220,7 @@ jobs: - name: 'Build' shell: pwsh - env: - CUDACXX: "/usr/local/cuda/bin/nvcc" - CUDA_PATH: "/usr/local/cuda" - CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" - LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" - run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateTOOL + run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateTOOL ubuntu: @@ -398,7 +368,7 @@ jobs: if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }} - name: Install dependencies - run: brew install libomp yasm nasm pkg-config + run: brew install libomp yasm nasm pkg-config automake autoconf-archive - uses: lukka/get-cmake@latest @@ -693,9 +663,9 @@ jobs: - name: 'Build' env: - CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2" - CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2" - CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2\\bin\\nvcc.exe" + CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6" + CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6" + CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6\\bin\\nvcc.exe" shell: pwsh run: ${{ github.workspace }}/build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateTOOL diff --git a/.github/workflows/on_pr.yml b/.github/workflows/on_pr.yml index 6e7d36b807c..1ad990ba91a 100644 --- a/.github/workflows/on_pr.yml +++ b/.github/workflows/on_pr.yml @@ -73,7 +73,7 @@ jobs: - name: Update apt run: sudo apt update - name: Install dependencies - run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev + run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev - name: Clean downloads run: sudo apt-get clean @@ -106,7 +106,7 @@ jobs: run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateTOOL - ubuntu-vcpkg-opencv3-cuda: + ubuntu-vcpkg-opencv3: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 @@ -116,20 +116,10 @@ jobs: - name: Update apt run: sudo apt update - name: Install dependencies - run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev + run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev - name: Clean downloads run: sudo apt-get clean - - name: 'Install CUDA' - run: ${{ github.workspace }}/scripts/deploy-cuda.sh - - - name: 'Create softlinks for CUDA' - run: | - source ${{ github.workspace }}/scripts/requested_cuda_version.sh - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so.1 - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so.1 - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so - - name: 'Setup vcpkg and NuGet artifacts backend' shell: bash run: > @@ -141,15 +131,10 @@ jobs: - name: 'Build' shell: pwsh - env: - CUDACXX: "/usr/local/cuda/bin/nvcc" - CUDA_PATH: "/usr/local/cuda" - CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" - LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" - run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateTOOL + run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateTOOL - ubuntu-vcpkg-opencv2-cuda: + ubuntu-vcpkg-opencv2: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 @@ -159,20 +144,10 @@ jobs: - name: Update apt run: sudo apt update - name: Install dependencies - run: sudo apt-get install -y --no-install-recommends yasm nasm gperf libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev + run: sudo apt-get install -y --no-install-recommends yasm nasm gperf automake autoconf libtool pkg-config autoconf-archive libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev - name: Clean downloads run: sudo apt-get clean - - name: 'Install CUDA' - run: ${{ github.workspace }}/scripts/deploy-cuda.sh - - - name: 'Create softlinks for CUDA' - run: | - source ${{ github.workspace }}/scripts/requested_cuda_version.sh - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so.1 - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so.1 - sudo ln -s /usr/local/cuda-${CUDA_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_VERSION}/lib64/libcuda.so - - name: 'Setup vcpkg and NuGet artifacts backend' shell: bash run: > @@ -184,12 +159,7 @@ jobs: - name: 'Build' shell: pwsh - env: - CUDACXX: "/usr/local/cuda/bin/nvcc" - CUDA_PATH: "/usr/local/cuda" - CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" - LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" - run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateTOOL + run: ${{ github.workspace }}/build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateTOOL ubuntu: @@ -289,7 +259,7 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies - run: brew install libomp yasm nasm pkg-config + run: brew install libomp yasm nasm pkg-config automake autoconf-archive - uses: lukka/get-cmake@latest @@ -435,9 +405,9 @@ jobs: - name: 'Build' env: - CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2" - CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2" - CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.2\\bin\\nvcc.exe" + CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6" + CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6" + CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6\\bin\\nvcc.exe" shell: pwsh run: ${{ github.workspace }}/build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateTOOL diff --git a/.gitignore b/.gitignore index bb62a6060b2..3166d82c7b3 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,8 @@ build/detect_cuda_compute_capabilities.cu build/.ninja_deps build/.ninja_log build/Makefile +CMakeFiles/ +CMakeCache.txt */vcpkg-manifest-install.log build.log __pycache__/ diff --git a/3rdparty/stb/include/stb_image.h b/3rdparty/stb/include/stb_image.h index 5e807a0a6e7..9eedabedc45 100644 --- a/3rdparty/stb/include/stb_image.h +++ b/3rdparty/stb/include/stb_image.h @@ -1,4 +1,4 @@ -/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb +/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb no warranty implied; use at your own risk Do this: @@ -48,6 +48,8 @@ LICENSE RECENT REVISION HISTORY: + 2.30 (2024-05-31) avoid erroneous gcc warning + 2.29 (2023-05-xx) optimizations 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes 2.26 (2020-07-13) many minor fixes @@ -1072,8 +1074,8 @@ static int stbi__addints_valid(int a, int b) return a <= INT_MAX - b; } -// returns 1 if the product of two signed shorts is valid, 0 on overflow. -static int stbi__mul2shorts_valid(short a, short b) +// returns 1 if the product of two ints fits in a signed short, 0 on overflow. +static int stbi__mul2shorts_valid(int a, int b) { if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid @@ -3384,13 +3386,13 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) return 1; } -static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) +static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) { // some JPEGs have junk at end, skip over it but if we find what looks // like a valid marker, resume there while (!stbi__at_eof(j->s)) { - int x = stbi__get8(j->s); - while (x == 255) { // might be a marker + stbi_uc x = stbi__get8(j->s); + while (x == 0xff) { // might be a marker if (stbi__at_eof(j->s)) return STBI__MARKER_none; x = stbi__get8(j->s); if (x != 0x00 && x != 0xff) { @@ -4176,6 +4178,7 @@ typedef struct { stbi_uc *zbuffer, *zbuffer_end; int num_bits; + int hit_zeof_once; stbi__uint32 code_buffer; char *zout; @@ -4242,9 +4245,20 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) int b,s; if (a->num_bits < 16) { if (stbi__zeof(a)) { - return -1; /* report error for unexpected end of data. */ + if (!a->hit_zeof_once) { + // This is the first time we hit eof, insert 16 extra padding btis + // to allow us to keep going; if we actually consume any of them + // though, that is invalid data. This is caught later. + a->hit_zeof_once = 1; + a->num_bits += 16; // add 16 implicit zero bits + } else { + // We already inserted our extra 16 padding bits and are again + // out, this stream is actually prematurely terminated. + return -1; + } + } else { + stbi__fill_bits(a); } - stbi__fill_bits(a); } b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { @@ -4309,6 +4323,13 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) int len,dist; if (z == 256) { a->zout = zout; + if (a->hit_zeof_once && a->num_bits < 16) { + // The first time we hit zeof, we inserted 16 extra zero bits into our bit + // buffer so the decoder can just do its speculative decoding. But if we + // actually consumed any of those bits (which is the case when num_bits < 16), + // the stream actually read past the end so it is malformed. + return stbi__err("unexpected end","Corrupt PNG"); + } return 1; } if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data @@ -4320,7 +4341,7 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); - if (zout + len > a->zout_end) { + if (len > a->zout_end - zout) { if (!stbi__zexpand(a, zout, len)) return 0; zout = a->zout; } @@ -4464,6 +4485,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; + a->hit_zeof_once = 0; do { final = stbi__zreceive(a,1); type = stbi__zreceive(a,2); @@ -4619,9 +4641,8 @@ enum { STBI__F_up=2, STBI__F_avg=3, STBI__F_paeth=4, - // synthetic filters used for first scanline to avoid needing a dummy row of 0s - STBI__F_avg_first, - STBI__F_paeth_first + // synthetic filter used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first }; static stbi_uc first_row_filter[5] = @@ -4630,29 +4651,56 @@ static stbi_uc first_row_filter[5] = STBI__F_sub, STBI__F_none, STBI__F_avg_first, - STBI__F_paeth_first + STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub }; static int stbi__paeth(int a, int b, int c) { - int p = a + b - c; - int pa = abs(p-a); - int pb = abs(p-b); - int pc = abs(p-c); - if (pa <= pb && pa <= pc) return a; - if (pb <= pc) return b; - return c; + // This formulation looks very different from the reference in the PNG spec, but is + // actually equivalent and has favorable data dependencies and admits straightforward + // generation of branch-free code, which helps performance significantly. + int thresh = c*3 - (a + b); + int lo = a < b ? a : b; + int hi = a < b ? b : a; + int t0 = (hi <= thresh) ? lo : c; + int t1 = (thresh <= lo) ? hi : t0; + return t1; } static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; +// adds an extra all-255 alpha channel +// dest == src is legal +// img_n must be 1 or 3 +static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n) +{ + int i; + // must process data backwards since we allow dest==src + if (img_n == 1) { + for (i=x-1; i >= 0; --i) { + dest[i*2+1] = 255; + dest[i*2+0] = src[i]; + } + } else { + STBI_ASSERT(img_n == 3); + for (i=x-1; i >= 0; --i) { + dest[i*4+3] = 255; + dest[i*4+2] = src[i*3+2]; + dest[i*4+1] = src[i*3+1]; + dest[i*4+0] = src[i*3+0]; + } + } +} + // create the png data from post-deflated data static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) { - int bytes = (depth == 16? 2 : 1); + int bytes = (depth == 16 ? 2 : 1); stbi__context *s = a->s; stbi__uint32 i,j,stride = x*out_n*bytes; stbi__uint32 img_len, img_width_bytes; + stbi_uc *filter_buf; + int all_ok = 1; int k; int img_n = s->img_n; // copy it into a local for later @@ -4664,8 +4712,11 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into if (!a->out) return stbi__err("outofmem", "Out of memory"); + // note: error exits here don't need to clean up a->out individually, + // stbi__do_png always does on error. if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); img_width_bytes = (((img_n * x * depth) + 7) >> 3); + if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG"); img_len = (img_width_bytes + 1) * y; // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, @@ -4673,189 +4724,137 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r // so just check for raw_len < img_len always. if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + // Allocate two scan lines worth of filter workspace buffer. + filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0); + if (!filter_buf) return stbi__err("outofmem", "Out of memory"); + + // Filtering for low-bit-depth images + if (depth < 8) { + filter_bytes = 1; + width = img_width_bytes; + } + for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *prior; + // cur/prior filter buffers alternate + stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes; + stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes; + stbi_uc *dest = a->out + stride*j; + int nk = width * filter_bytes; int filter = *raw++; - if (filter > 4) - return stbi__err("invalid filter","Corrupt PNG"); - - if (depth < 8) { - if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG"); - cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place - filter_bytes = 1; - width = img_width_bytes; + // check filter type + if (filter > 4) { + all_ok = stbi__err("invalid filter","Corrupt PNG"); + break; } - prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above // if first row, use special filter that doesn't sample previous row if (j == 0) filter = first_row_filter[filter]; - // handle first byte explicitly - for (k=0; k < filter_bytes; ++k) { - switch (filter) { - case STBI__F_none : cur[k] = raw[k]; break; - case STBI__F_sub : cur[k] = raw[k]; break; - case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; - case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; - case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; - case STBI__F_avg_first : cur[k] = raw[k]; break; - case STBI__F_paeth_first: cur[k] = raw[k]; break; - } - } - - if (depth == 8) { - if (img_n != out_n) - cur[img_n] = 255; // first pixel - raw += img_n; - cur += out_n; - prior += out_n; - } else if (depth == 16) { - if (img_n != out_n) { - cur[filter_bytes] = 255; // first pixel top byte - cur[filter_bytes+1] = 255; // first pixel bottom byte - } - raw += filter_bytes; - cur += output_bytes; - prior += output_bytes; - } else { - raw += 1; - cur += 1; - prior += 1; + // perform actual filtering + switch (filter) { + case STBI__F_none: + memcpy(cur, raw, nk); + break; + case STBI__F_sub: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); + break; + case STBI__F_up: + for (k = 0; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); + break; + case STBI__F_avg: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); + break; + case STBI__F_paeth: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0) + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes])); + break; + case STBI__F_avg_first: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); + break; } - // this is a little gross, so that we don't switch per-pixel or per-component - if (depth < 8 || img_n == out_n) { - int nk = (width - 1)*filter_bytes; - #define STBI__CASE(f) \ - case f: \ - for (k=0; k < nk; ++k) - switch (filter) { - // "none" filter turns into a memcpy here; make that explicit. - case STBI__F_none: memcpy(cur, raw, nk); break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; - } - #undef STBI__CASE - raw += nk; - } else { - STBI_ASSERT(img_n+1 == out_n); - #define STBI__CASE(f) \ - case f: \ - for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ - for (k=0; k < filter_bytes; ++k) - switch (filter) { - STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; - } - #undef STBI__CASE - - // the loop above sets the high byte of the pixels' alpha, but for - // 16 bit png files we also need the low byte set. we'll do that here. - if (depth == 16) { - cur = a->out + stride*j; // start at the beginning of the row again - for (i=0; i < x; ++i,cur+=output_bytes) { - cur[filter_bytes+1] = 255; - } - } - } - } + raw += nk; - // we make a separate pass to expand bits to pixels; for performance, - // this could run two scanlines behind the above code, so it won't - // intefere with filtering but will still be in the cache. - if (depth < 8) { - for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; - // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit - // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + // expand decoded bits in cur to dest, also adding an extra alpha channel if desired + if (depth < 8) { stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + stbi_uc *in = cur; + stbi_uc *out = dest; + stbi_uc inb = 0; + stbi__uint32 nsmp = x*img_n; - // note that the final byte might overshoot and write more data than desired. - // we can allocate enough data that this never writes out of memory, but it - // could also overwrite the next scanline. can it overwrite non-empty data - // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. - // so we need to explicitly clamp the final ones - + // expand bits to bytes first if (depth == 4) { - for (k=x*img_n; k >= 2; k-=2, ++in) { - *cur++ = scale * ((*in >> 4) ); - *cur++ = scale * ((*in ) & 0x0f); + for (i=0; i < nsmp; ++i) { + if ((i & 1) == 0) inb = *in++; + *out++ = scale * (inb >> 4); + inb <<= 4; } - if (k > 0) *cur++ = scale * ((*in >> 4) ); } else if (depth == 2) { - for (k=x*img_n; k >= 4; k-=4, ++in) { - *cur++ = scale * ((*in >> 6) ); - *cur++ = scale * ((*in >> 4) & 0x03); - *cur++ = scale * ((*in >> 2) & 0x03); - *cur++ = scale * ((*in ) & 0x03); + for (i=0; i < nsmp; ++i) { + if ((i & 3) == 0) inb = *in++; + *out++ = scale * (inb >> 6); + inb <<= 2; } - if (k > 0) *cur++ = scale * ((*in >> 6) ); - if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); - if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); - } else if (depth == 1) { - for (k=x*img_n; k >= 8; k-=8, ++in) { - *cur++ = scale * ((*in >> 7) ); - *cur++ = scale * ((*in >> 6) & 0x01); - *cur++ = scale * ((*in >> 5) & 0x01); - *cur++ = scale * ((*in >> 4) & 0x01); - *cur++ = scale * ((*in >> 3) & 0x01); - *cur++ = scale * ((*in >> 2) & 0x01); - *cur++ = scale * ((*in >> 1) & 0x01); - *cur++ = scale * ((*in ) & 0x01); + } else { + STBI_ASSERT(depth == 1); + for (i=0; i < nsmp; ++i) { + if ((i & 7) == 0) inb = *in++; + *out++ = scale * (inb >> 7); + inb <<= 1; } - if (k > 0) *cur++ = scale * ((*in >> 7) ); - if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); - if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); - if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); - if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); - if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); - if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); } - if (img_n != out_n) { - int q; - // insert alpha = 255 - cur = a->out + stride*j; + + // insert alpha=255 values if desired + if (img_n != out_n) + stbi__create_png_alpha_expand8(dest, dest, x, img_n); + } else if (depth == 8) { + if (img_n == out_n) + memcpy(dest, cur, x*img_n); + else + stbi__create_png_alpha_expand8(dest, cur, x, img_n); + } else if (depth == 16) { + // convert the image data from big-endian to platform-native + stbi__uint16 *dest16 = (stbi__uint16*)dest; + stbi__uint32 nsmp = x*img_n; + + if (img_n == out_n) { + for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) + *dest16 = (cur[0] << 8) | cur[1]; + } else { + STBI_ASSERT(img_n+1 == out_n); if (img_n == 1) { - for (q=x-1; q >= 0; --q) { - cur[q*2+1] = 255; - cur[q*2+0] = cur[q]; + for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = 0xffff; } } else { STBI_ASSERT(img_n == 3); - for (q=x-1; q >= 0; --q) { - cur[q*4+3] = 255; - cur[q*4+2] = cur[q*3+2]; - cur[q*4+1] = cur[q*3+1]; - cur[q*4+0] = cur[q*3+0]; + for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = (cur[2] << 8) | cur[3]; + dest16[2] = (cur[4] << 8) | cur[5]; + dest16[3] = 0xffff; } } } } - } else if (depth == 16) { - // force the image data from big-endian to platform-native. - // this is done in a separate pass due to the decoding relying - // on the data being untouched, but could probably be done - // per-line during decode if care is taken. - stbi_uc *cur = a->out; - stbi__uint16 *cur16 = (stbi__uint16*)cur; - - for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { - *cur16 = (cur[0] << 8) | cur[1]; - } } + STBI_FREE(filter_buf); + if (!all_ok) return 0; + return 1; } @@ -5161,9 +5160,11 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now. if (scan == STBI__SCAN_header) { ++s->img_n; return 1; } if (z->depth == 16) { - for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning + tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is } else { - for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + for (k = 0; k < s->img_n && k < 3; ++k) + tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger } } break; diff --git a/CMakeLists.txt b/CMakeLists.txt index 4710e39def8..8c65eeff301 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ option(VCPKG_USE_OPENCV2 "Use legacy OpenCV 2" OFF) option(VCPKG_USE_OPENCV3 "Use legacy OpenCV 3" OFF) option(VCPKG_USE_OPENCV4 "Use OpenCV 4" ON) option(USE_NSIS "Use NSIS as a CPack backend on Windows" ON) +option(SKIP_INSTALL_RUNTIME_LIBS "Do not install runtime libs" OFF) if(DEFINED ENV{VCPKG_DEFAULT_TRIPLET}) message(STATUS "Setting default vcpkg target triplet to $ENV{VCPKG_DEFAULT_TRIPLET}") @@ -52,26 +53,22 @@ if(ENABLE_OPENCV) if(VCPKG_USE_OPENCV4) list(APPEND VCPKG_MANIFEST_FEATURES "opencv-cuda") elseif(VCPKG_USE_OPENCV3) - list(APPEND VCPKG_MANIFEST_FEATURES "opencv3-cuda") + list(APPEND VCPKG_MANIFEST_FEATURES "opencv3") elseif(VCPKG_USE_OPENCV2) - list(APPEND VCPKG_MANIFEST_FEATURES "opencv2-cuda") + list(APPEND VCPKG_MANIFEST_FEATURES "opencv2") endif() else() if(VCPKG_USE_OPENCV4) list(APPEND VCPKG_MANIFEST_FEATURES "opencv-base") elseif(VCPKG_USE_OPENCV3) - list(APPEND VCPKG_MANIFEST_FEATURES "opencv3-base") + list(APPEND VCPKG_MANIFEST_FEATURES "opencv3") elseif(VCPKG_USE_OPENCV2) - list(APPEND VCPKG_MANIFEST_FEATURES "opencv2-base") + list(APPEND VCPKG_MANIFEST_FEATURES "opencv2") endif() endif() endif() -if(NOT CMAKE_HOST_SYSTEM_PROCESSOR AND NOT WIN32) - execute_process(COMMAND "uname" "-m" OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR OUTPUT_STRIP_TRAILING_WHITESPACE) -endif() - -if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^AMD64") set(IS_X86 TRUE) else() set(IS_X86 FALSE) @@ -672,13 +669,14 @@ if(ENABLE_CSHARP_WRAPPER) add_subdirectory(src/csharp) endif() -set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_SKIP TRUE) -include(InstallRequiredSystemLibraries) - -install( +if (NOT SKIP_INSTALL_RUNTIME_LIBS) + set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_SKIP TRUE) + include(InstallRequiredSystemLibraries) + install( PROGRAMS ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS} DESTINATION ${INSTALL_BIN_DIR} -) + ) +endif() if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vcpkg.json) file(READ ${CMAKE_CURRENT_SOURCE_DIR}/vcpkg.json VCPKG_JSON_STRING) diff --git a/Dockerfile.cpu b/Dockerfile.cpu index 7a15cfb499e..db433f29b25 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -14,7 +14,7 @@ RUN rm Dockerfile.cpu RUN rm Dockerfile.gpu -RUN rm Docker-compose.yml +RUN rm docker-compose.yml RUN make @@ -28,7 +28,7 @@ RUN apt-get install -y sudo libgomp1 RUN useradd -U -m yolo -RUN usermod -aG sudo yolo +RUN usermod -aG sudo yolo RUN usermod --shell /bin/bash yolo @@ -45,5 +45,3 @@ RUN ldconfig WORKDIR /home/yolo/darknet USER yolo - - diff --git a/Dockerfile.gpu b/Dockerfile.gpu index c4a9effbbcc..f1985fbcfc0 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -14,11 +14,11 @@ RUN rm Dockerfile.cpu RUN rm Dockerfile.gpu -RUN rm Docker-compose.yml +RUN rm docker-compose.yml RUN make -FROM nvidia/cuda:11.6.0-cudnn8-devel-ubuntu20.04 +FROM nvidia/cuda:11.6.1-cudnn8-devel-ubuntu20.04 ENV DEBIAN_FRONTEND noninteractive @@ -28,7 +28,7 @@ RUN apt-get install -y sudo libgomp1 RUN useradd -U -m yolo -RUN usermod -aG sudo yolo +RUN usermod -aG sudo yolo RUN usermod --shell /bin/bash yolo @@ -44,4 +44,4 @@ RUN ldconfig WORKDIR /home/yolo/darknet -USER yolo \ No newline at end of file +USER yolo diff --git a/build.ps1 b/build.ps1 index e378e4b4536..07ecf1f17a8 100755 --- a/build.ps1 +++ b/build.ps1 @@ -6,7 +6,7 @@ build Created By: Stefano Sinigardi Created Date: February 18, 2019 - Last Modified Date: September 25, 2023 + Last Modified Date: April 29, 2024 .DESCRIPTION Build tool using CMake, trying to properly setup the environment around compiler @@ -92,6 +92,12 @@ Force using a different buildtrees dir for vcpkg .PARAMETER ForceVCPKGPackagesRemoval Force clean up of vcpkg packages folder at the end of the script +.PARAMETER CloneVCPKGShallow +Clone vcpkg as shallow repository + +.PARAMETER ForceDisableVCPKGShallow +Force vcpkg clone to NOT be a shallow one + .PARAMETER ForceSetupVS Forces Visual Studio setup, also on systems on which it would not have been enabled automatically @@ -179,6 +185,8 @@ param ( [switch]$ForceVCPKGBuildtreesRemoval = $false, [string]$ForceVCPKGBuildtreesPath = "", [switch]$ForceVCPKGPackagesRemoval = $false, + [switch]$CloneVCPKGShallow = $false, + [switch]$ForceDisableVCPKGShallow = $false, [switch]$ForceSetupVS = $false, [switch]$ForceCMakeFromVS = $false, [switch]$ForceNinjaFromVS = $false, @@ -193,13 +201,33 @@ param ( $global:DisableInteractive = $DisableInteractive -$build_ps1_version = "3.6.1" +$build_ps1_version = "4.0.1" $script_name = $MyInvocation.MyCommand.Name $utils_psm1_avail = $false -if (Test-Path $PSScriptRoot/scripts/utils.psm1) { +if (Test-Path $PSScriptRoot/utils.psm1) { + Import-Module -Name $PSScriptRoot/utils.psm1 -Force + $utils_psm1_avail = $true +} +elseif (Test-Path $PSScriptRoot/cmake/utils.psm1) { + Import-Module -Name $PSScriptRoot/cmake/utils.psm1 -Force + $utils_psm1_avail = $true + $IsInGitSubmodule = $false +} +elseif (Test-Path $PSScriptRoot/ci/utils.psm1) { + Import-Module -Name $PSScriptRoot/ci/utils.psm1 -Force + $utils_psm1_avail = $true + $IsInGitSubmodule = $false +} +elseif (Test-Path $PSScriptRoot/ccm/utils.psm1) { + Import-Module -Name $PSScriptRoot/ccm/utils.psm1 -Force + $utils_psm1_avail = $true + $IsInGitSubmodule = $false +} +elseif (Test-Path $PSScriptRoot/scripts/utils.psm1) { Import-Module -Name $PSScriptRoot/scripts/utils.psm1 -Force $utils_psm1_avail = $true + $IsInGitSubmodule = $false } else { $utils_psm1_version = "unavail" @@ -306,6 +334,10 @@ if ($InstallDARKNETthroughVCPKG -and -not $EnableOPENCV) { if ($UseVCPKG) { Write-Host "vcpkg bootstrap script: bootstrap-vcpkg${bootstrap_ext}" + if(($ForceOpenCVVersion -eq 0) -and -Not $ForceDisableVCPKGShallow) { + Write-Host "vcpkg will be cloned in shallow mode since baseline is not needed" + $CloneVCPKGShallow = $true + } } if ((-Not $IsWindows) -and (-Not $IsWindowsPowerShell) -and (-Not $ForceSetupVS)) { @@ -323,6 +355,31 @@ if (($IsLinux -or $IsMacOS) -and ($ForceGCCVersion -gt 0)) { $env:CXX = "g++-$ForceGCCVersion" } +$osArchitecture = [System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture +switch ($osArchitecture) { + "X86" { + $vcpkgArchitecture = "x86" + $vsArchitecture = "Win32" + } + "X64" { + $vcpkgArchitecture = "x64" + $vsArchitecture = "x64" + } + "Arm" { + $vcpkgArchitecture = "arm" + $vsArchitecture = "arm" + } + "Arm64" { + $vcpkgArchitecture = "arm64" + $vsArchitecture = "arm64" + } + default { + $vcpkgArchitecture = "x64" + $vsArchitecture = "x64" + Write-Output "Unknown architecture. Trying x64" + } +} + $vcpkg_triplet_set_by_this_script = $false $vcpkg_host_triplet_set_by_this_script = $false @@ -336,48 +393,48 @@ if (($IsWindows -or $IsWindowsPowerShell) -and (-Not $env:VCPKG_DEFAULT_TRIPLET) $DoNotUseNinja = $true Write-Host "Warning: when building for 32bit windows target, only msbuild can be used and ninja will be disabled. Doing that for you!" -ForegroundColor Yellow } - $env:VCPKG_DEFAULT_TRIPLET = "x86-windows" + $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-windows" $vcpkg_triplet_set_by_this_script = $true } else { if($BuildDebug) { - $env:VCPKG_DEFAULT_TRIPLET = "x64-windows" + $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-windows" $vcpkg_triplet_set_by_this_script = $true } else { - $env:VCPKG_DEFAULT_TRIPLET = "x64-windows-release" + $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-windows-release" $vcpkg_triplet_set_by_this_script = $true } } } if (($IsWindows -or $IsWindowsPowerShell) -and (-Not $env:VCPKG_DEFAULT_HOST_TRIPLET)) { if ($BuildDebug) { - $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-windows" + $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-windows" $vcpkg_host_triplet_set_by_this_script = $true } else { - $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-windows-release" + $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-windows-release" $vcpkg_host_triplet_set_by_this_script = $true } } if ($IsMacOS -and (-Not $env:VCPKG_DEFAULT_TRIPLET)) { if ($BuildDebug) { - $env:VCPKG_DEFAULT_TRIPLET = "x64-osx" + $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-osx" $vcpkg_triplet_set_by_this_script = $true } else { - $env:VCPKG_DEFAULT_TRIPLET = "x64-osx-release" + $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-osx-release" $vcpkg_triplet_set_by_this_script = $true } } if ($IsMacOS -and (-Not $env:VCPKG_DEFAULT_HOST_TRIPLET)) { if ($BuildDebug) { - $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-osx" + $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-osx" $vcpkg_host_triplet_set_by_this_script = $true } else { - $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-osx-release" + $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-osx-release" $vcpkg_host_triplet_set_by_this_script = $true } } @@ -385,22 +442,22 @@ if ($IsMacOS -and (-Not $env:VCPKG_DEFAULT_HOST_TRIPLET)) { if ($IsLinux -and (-Not $env:VCPKG_DEFAULT_TRIPLET)) { if ($true) { if ($BuildDebug) { - $env:VCPKG_DEFAULT_TRIPLET = "x64-linux" + $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-linux" $vcpkg_triplet_set_by_this_script = $true } else { - $env:VCPKG_DEFAULT_TRIPLET = "x64-linux-release" + $env:VCPKG_DEFAULT_TRIPLET = "${vcpkgArchitecture}-linux-release" $vcpkg_triplet_set_by_this_script = $true } } } if ($IsLinux -and (-Not $env:VCPKG_DEFAULT_HOST_TRIPLET)) { if ($BuildDebug) { - $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-linux" + $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-linux" $vcpkg_host_triplet_set_by_this_script = $true } else { - $env:VCPKG_DEFAULT_HOST_TRIPLET = "x64-linux-release" + $env:VCPKG_DEFAULT_HOST_TRIPLET = "${vcpkgArchitecture}-linux-release" $vcpkg_host_triplet_set_by_this_script = $true } } @@ -630,11 +687,11 @@ if (-Not $DoNotUseNinja) { if (-Not $DoNotSetupVS) { $CL_EXE = Get-Command "cl" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition - if ((-Not $CL_EXE) -or ($CL_EXE -match "HostX86\\x86") -or ($CL_EXE -match "HostX64\\x86")) { + if (-Not $CL_EXE) { $vsfound = getLatestVisualStudioWithDesktopWorkloadPath Write-Host "Found VS in ${vsfound}" Push-Location "${vsfound}/Common7/Tools" - cmd.exe /c "VsDevCmd.bat -arch=x64 & set" | + cmd.exe /c "VsDevCmd.bat -arch=${vsArchitecture} & set" | ForEach-Object { if ($_ -match "=") { $v = $_.split("="); Set-Item -force -path "ENV:\$($v[0])" -value "$($v[1])" @@ -650,26 +707,26 @@ if (-Not $DoNotSetupVS) { $debugConfig = " --config Debug " $releaseConfig = " --config Release " if ($Use32bitTriplet) { - $targetArchitecture = "`"Win32`"" + $targetArchitecture = "`"${vsArchitecture}`"" } else { - $targetArchitecture = "`"x64`"" + $targetArchitecture = "`"${vsArchitecture}`"" } if ($tokens[0] -eq "14") { $generator = "Visual Studio 14 2015" - $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A $targetArchitecture" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -A $targetArchitecture" } elseif ($tokens[0] -eq "15") { $generator = "Visual Studio 15 2017" - $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A $targetArchitecture" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -A $targetArchitecture" } elseif ($tokens[0] -eq "16") { $generator = "Visual Studio 16 2019" - $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A $targetArchitecture" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -A $targetArchitecture" } elseif ($tokens[0] -eq "17") { $generator = "Visual Studio 17 2022" - $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A $targetArchitecture" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -A $targetArchitecture" } else { MyThrow("Unknown Visual Studio version, unsupported configuration") @@ -739,7 +796,7 @@ if ($UseVCPKG -And -Not $ForceLocalVCPKG) { if (($null -eq $vcpkg_path) -and $UseVCPKG) { if (-Not (Test-Path "$PWD/vcpkg${VCPKGSuffix}")) { $shallow_copy = "" - if(($ForceOpenCVVersion -eq 0)) { + if($CloneVCPKGShallow) { $shallow_copy = " --depth 1 " } $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "clone $shallow_copy https://github.com/microsoft/vcpkg vcpkg${VCPKGSuffix}" @@ -850,21 +907,14 @@ if ($BuildInstaller) { if (($ForceOpenCVVersion -eq 2) -and $UseVCPKG) { Write-Host "You requested OpenCV version 2, so vcpkg will install that version" -ForegroundColor Yellow - Write-Host "This requires using vcpkg.json.opencv23 as manifest file" -ForegroundColor Yellow $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_USE_OPENCV4=OFF -DVCPKG_USE_OPENCV2=ON" } if (($ForceOpenCVVersion -eq 3) -and $UseVCPKG) { Write-Host "You requested OpenCV version 3, so vcpkg will install that version" -ForegroundColor Yellow - Write-Host "This requires using vcpkg.json.opencv23 as manifest file" -ForegroundColor Yellow $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_USE_OPENCV4=OFF -DVCPKG_USE_OPENCV3=ON" } -if($ForceOpenCVVersion -gt 0) { - Move-Item $PSCustomScriptRoot/vcpkg.json $PSCustomScriptRoot/vcpkg.json.bak - Move-Item $PSCustomScriptRoot/vcpkg.json.opencv23 $PSCustomScriptRoot/vcpkg.json -} - if ($UseVCPKG -and $ForceVCPKGCacheRemoval) { if ($IsWindows -or $IsWindowsPowerShell) { $vcpkgbinarycachepath = "$env:LOCALAPPDATA/vcpkg/archive" @@ -999,6 +1049,9 @@ else { Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $debug_build_folder } + if (-Not (Test-Path $DebugInstallPrefix)) { + New-Item -Path $DebugInstallPrefix -ItemType directory -Force | Out-Null + } New-Item -Path $debug_build_folder -ItemType directory -Force | Out-Null Set-Location $debug_build_folder $cmake_args = "-G `"$generator`" ${DebugBuildSetup} ${AdditionalBuildSetup} -S .." @@ -1031,6 +1084,9 @@ else { Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $release_build_folder } + if (-Not (Test-Path $ReleaseInstallPrefix)) { + New-Item -Path $ReleaseInstallPrefix -ItemType directory -Force | Out-Null + } New-Item -Path $release_build_folder -ItemType directory -Force | Out-Null Set-Location $release_build_folder $cmake_args = "-G `"$generator`" ${ReleaseBuildSetup} ${AdditionalBuildSetup} -S .." @@ -1056,7 +1112,7 @@ else { if (-Not ($exitCode -eq 0)) { MyThrow("Build failed! Exited with error code $exitCode.") } - if (-Not $UseVCPKG -And -Not $DisableDLLcopy) { + if ($IsWindows -And -Not $UseVCPKG -And -Not $DisableDLLcopy) { $dllfiles = Get-ChildItem ./${dllfolder}/*.dll if ($dllfiles) { Copy-Item $dllfiles .. @@ -1112,11 +1168,6 @@ if ($vcpkg_host_triplet_set_by_this_script) { $env:VCPKG_DEFAULT_HOST_TRIPLET = $null } -if($ForceOpenCVVersion -gt 0) { - Move-Item $PSCustomScriptRoot/vcpkg.json $PSCustomScriptRoot/vcpkg.json.opencv23 - Move-Item $PSCustomScriptRoot/vcpkg.json.bak $PSCustomScriptRoot/vcpkg.json -} - if ($vcpkg_branch_set_by_this_script) { Push-Location $vcpkg_path $git_args = "checkout -" diff --git a/docker-compose.yml b/docker-compose.yml index 968fbccaaf9..2eebd876997 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: '2' services: yolo-gpu: - build: + build: context: . dockerfile: Dockerfile.gpu image: yolo:gpu @@ -14,7 +14,7 @@ services: count: 1 capabilities: [gpu] yolo-cpu: - build: + build: context: . dockerfile: Dockerfile.cpu - image: yolo:cpu \ No newline at end of file + image: yolo:cpu diff --git a/scripts/gen_anchors.py b/scripts/gen_anchors.py index 5568952413a..9da6e6e5b20 100644 --- a/scripts/gen_anchors.py +++ b/scripts/gen_anchors.py @@ -110,7 +110,7 @@ def main(argv): args = parser.parse_args() if not os.path.exists(args.output_dir): - os.mkdir(args.output_dir) + os.makedirs(args.output_dir) f = open(args.filelist) diff --git a/scripts/requested_cuda_version.sh b/scripts/requested_cuda_version.sh index ae9ea2a4f55..8cb3f6ad9d6 100755 --- a/scripts/requested_cuda_version.sh +++ b/scripts/requested_cuda_version.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash -export CUDA_VERSION="12.2" +export CUDA_VERSION="12.6" export CUDA_VERSION_DASHED="${CUDA_VERSION//./-}" diff --git a/scripts/setup.sh b/scripts/setup.sh index e583e972369..6cbc7be8fa1 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -75,7 +75,7 @@ elif [[ $(cut -f2 <<< $(lsb_release -i)) == "Ubuntu" ]]; then if [ "$install_tools" = true ] ; then echo "Installing tools" sudo apt-get update - sudo apt-get install -y --no-install-recommends git ninja-build build-essential g++ nasm yasm gperf + sudo apt-get install -y --no-install-recommends git ninja-build build-essential g++ nasm yasm gperf sudo apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg software-properties-common wget sudo apt-get install -y --no-install-recommends libgles2-mesa-dev libx11-dev libxft-dev libxext-dev libxrandr-dev libxi-dev libxcursor-dev libxdamage-dev libxinerama-dev libdbus-1-dev libxtst-dev wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null @@ -88,7 +88,8 @@ elif [[ $(cut -f2 <<< $(lsb_release -i)) == "Ubuntu" ]]; then sudo apt-get install -y --no-install-recommends cmake sudo apt-get install -y --no-install-recommends powershell sudo apt-get install -y --no-install-recommends curl zip unzip tar - sudo apt-get install -y --no-install-recommends pkg-config autoconf libtool bison + sudo apt-get install -y --no-install-recommends bison + sudo apt-get install -y --no-install-recommends automake autoconf libtool pkg-config autoconf-archive sudo apt-get clean fi else diff --git a/scripts/utils.psm1 b/scripts/utils.psm1 index 928d9339ae8..cf4f0dccf64 100644 --- a/scripts/utils.psm1 +++ b/scripts/utils.psm1 @@ -22,7 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #> -$utils_psm1_version = "0.3.0" +$utils_psm1_version = "1.4.1" $IsWindowsPowerShell = switch ( $PSVersionTable.PSVersion.Major ) { 5 { $true } 4 { $true } @@ -38,6 +38,31 @@ if ($IsWindowsPowerShell -or $IsWindows) { $64bitPwsh = $([Environment]::Is64BitProcess) $64bitOS = $([Environment]::Is64BitOperatingSystem) +$osArchitecture = [System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture +switch ($osArchitecture) { + "X86" { + $vcpkgArchitecture = "x86" + $vsArchitecture = "Win32" + } + "X64" { + $vcpkgArchitecture = "x64" + $vsArchitecture = "x64" + } + "Arm" { + $vcpkgArchitecture = "arm" + $vsArchitecture = "arm" + } + "Arm64" { + $vcpkgArchitecture = "arm64" + $vsArchitecture = "arm64" + } + default { + $vcpkgArchitecture = "x64" + $vsArchitecture = "x64" + Write-Output "Unknown architecture. Trying x64" + } +} + Push-Location $PSScriptRoot $GIT_EXE = Get-Command "git" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition @@ -55,11 +80,36 @@ else { } Pop-Location -$cuda_version_full = "12.2.0" -$cuda_version_short = "12.2" +$cuda_version_full = "12.6.2" +$cuda_version_short = "12.6" $cuda_version_full_dashed = $cuda_version_full.replace('.', '-') $cuda_version_short_dashed = $cuda_version_short.replace('.', '-') +function activateVenv([string]$VenvPath) { + if ($IsWindowsPowerShell -or $IsWindows) { + $activate_script = "$VenvPath/Scripts/Activate.ps1" + } + else { + $activate_script = "$VenvPath/bin/Activate.ps1" + } + + $activate_script = Resolve-Path $activate_script + $VenvPath = Resolve-Path $VenvPath + + if ($env:VIRTUAL_ENV -eq $VenvPath) { + Write-Host "Venv already activated" + return + } + else { + Write-Host "Activating venv" + if (-Not (Test-Path $activate_script)) { + MyThrow("Could not find activate script at $activate_script") + } + & $activate_script + } +} + + function getProgramFiles32bit() { $out = ${env:PROGRAMFILES(X86)} if ($null -eq $out) { @@ -167,18 +217,49 @@ function getLatestVisualStudioWithDesktopWorkloadVersion([bool]$required = $true return $installationVersion } +function setupVisualStudio([bool]$required = $true, [bool]$enable_clang = $false) { + $CL_EXE = Get-Command "cl" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition + if (-Not $CL_EXE) { + $vsfound = getLatestVisualStudioWithDesktopWorkloadPath($required) + if (-Not $vsfound) { + if ($required) { + MyThrow("Could not locate any installation of Visual Studio") + } + else { + Write-Host "Could not locate any installation of Visual Studio" -ForegroundColor Red + return + } + } + else { + Write-Host "Found VS in ${vsfound}" + Push-Location "${vsfound}/Common7/Tools" + cmd.exe /c "VsDevCmd.bat -arch=${vsArchitecture} & set" | + ForEach-Object { + if ($_ -match "=") { + $v = $_.split("="); Set-Item -force -path "ENV:\$($v[0])" -value "$($v[1])" + } + } + Pop-Location + if ($enable_clang) { + $env:PATH = "${vsfound}/VC/Tools/Llvm/${vsArchitecture}/bin;$env:PATH" + } + Write-Host "Visual Studio Command Prompt variables set" + } + } +} + function DownloadNinja() { Write-Host "Downloading a portable version of Ninja" -ForegroundColor Yellow Remove-Item -Force -Recurse -ErrorAction SilentlyContinue ninja Remove-Item -Force -ErrorAction SilentlyContinue ninja.zip if ($IsWindows -or $IsWindowsPowerShell) { - $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip" + $url = "https://github.com/ninja-build/ninja/releases/download/v1.12.1/ninja-win.zip" } elseif ($IsLinux) { - $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip" + $url = "https://github.com/ninja-build/ninja/releases/download/v1.12.1/ninja-linux.zip" } elseif ($IsMacOS) { - $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-mac.zip" + $url = "https://github.com/ninja-build/ninja/releases/download/v1.12.1/ninja-mac.zip" } else { MyThrow("Unknown OS, unsupported") @@ -192,12 +273,12 @@ function DownloadNinja() { function DownloadAria2() { Write-Host "Downloading a portable version of Aria2" -ForegroundColor Yellow if ($IsWindows -or $IsWindowsPowerShell) { - $basename = "aria2-1.35.0-win-32bit-build1" + $basename = "aria2-1.37.0-win-32bit-build1" $zipName = "${basename}.zip" $outFolder = "$basename/$basename" Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $outFolder Remove-Item -Force -ErrorAction SilentlyContinue $zipName - $url = "https://github.com/aria2/aria2/releases/download/release-1.35.0/$zipName" + $url = "https://github.com/aria2/aria2/releases/download/release-1.37.0/$zipName" Invoke-RestMethod -Uri $url -Method Get -ContentType application/zip -OutFile $zipName Expand-Archive -Path $zipName } @@ -228,6 +309,29 @@ function DownloadAria2() { return "./$outFolder/aria2c${ExecutableSuffix}" } +function DownloadLicencpp() { + $licencpp_version = "0.2.5" + Write-Host "Downloading a portable version of licencpp v${licencpp_version}" -ForegroundColor Yellow + if ($IsWindows -or $IsWindowsPowerShell) { + $basename = "licencpp-Windows" + } + elseif ($IsLinux) { + $basename = "licencpp-Linux" + } + else { + MyThrow("Unknown OS, unsupported") + } + $zipName = "${basename}.zip" + $outFolder = "${basename}" + Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $outFolder + Remove-Item -Force -ErrorAction SilentlyContinue $zipName + $url = "https://github.com/cenit/licencpp/releases/download/v${licencpp_version}/$zipName" + Invoke-RestMethod -Uri $url -Method Get -ContentType application/zip -OutFile $zipName + Expand-Archive -Path $zipName + Remove-Item -Force -ErrorAction SilentlyContinue $zipName + return "./$outFolder/licencpp${ExecutableSuffix}" +} + function Download7Zip() { Write-Host "Downloading a portable version of 7-Zip" -ForegroundColor Yellow if ($IsWindows -or $IsWindowsPowerShell) { @@ -324,6 +428,65 @@ Function MyThrow ($Message) { } } +Function CopyTexFile ($MyFile) { + $MyFileName = Split-Path $MyFile -Leaf + New-Item -ItemType Directory -Force -Path "~/${latex_path}" | Out-Null + if (-Not (Test-Path "~/${latex_path}/$MyFileName" )) { + Write-Host "Copying $MyFile to ~/${latex_path}" + Copy-Item "$MyFile" "~/${latex_path}" + } + else { + Write-Host "~/${latex_path}/$MyFileName already present" + } +} + +Function dos2unix { + Param ( + [Parameter(mandatory = $true)] + [string[]]$path + ) + + Get-ChildItem -File -Recurse -Path $path | + ForEach-Object { + Write-Host "Converting $_" + $x = get-content -raw -path $_.fullname; $x -replace "`r`n", "`n" | Set-Content -NoNewline -Force -path $_.fullname + } +} + +Function unix2dos { + Param ( + [Parameter(mandatory = $true)] + [string[]]$path + ) + + Get-ChildItem -File -Recurse -Path $path | + ForEach-Object { + $x = get-content -raw -path $_.fullname + $SearchStr = [regex]::Escape("`r`n") + $SEL = Select-String -InputObject $x -Pattern $SearchStr + if ($null -ne $SEL) { + Write-Host "Converting $_" + # do nothing: avoid creating files containing `r`r`n when using unix2dos twice on the same file + } + else { + Write-Host "Converting $_" + $x -replace "`n", "`r`n" | Set-Content -NoNewline -Force -path $_.fullname + } + } +} + +Function UpdateRepo { + if ($GIT_EXE) { + Get-ChildItem -Directory | + ForEach-Object { + Set-Location $_.Name + git pull + git submodule update --recursive + Set-Location .. + } + } +} + Export-ModuleMember -Variable utils_psm1_version Export-ModuleMember -Variable IsWindowsPowerShell Export-ModuleMember -Variable IsInGitSubmodule @@ -333,10 +496,21 @@ Export-ModuleMember -Variable cuda_version_full Export-ModuleMember -Variable cuda_version_short Export-ModuleMember -Variable cuda_version_full_dashed Export-ModuleMember -Variable cuda_version_short_dashed +Export-ModuleMember -Variable osArchitecture +Export-ModuleMember -Variable vcpkgArchitecture +Export-ModuleMember -Variable vsArchitecture +Export-ModuleMember -Variable ExecutableSuffix +Export-ModuleMember -Function activateVenv Export-ModuleMember -Function getProgramFiles32bit Export-ModuleMember -Function getLatestVisualStudioWithDesktopWorkloadPath Export-ModuleMember -Function getLatestVisualStudioWithDesktopWorkloadVersion +Export-ModuleMember -Function setupVisualStudio Export-ModuleMember -Function DownloadNinja Export-ModuleMember -Function DownloadAria2 Export-ModuleMember -Function Download7Zip +Export-ModuleMember -Function DownloadLicencpp Export-ModuleMember -Function MyThrow +Export-ModuleMember -Function CopyTexFile +Export-ModuleMember -Function dos2unix +Export-ModuleMember -Function unix2dos +Export-ModuleMember -Function UpdateRepo diff --git a/src/layer.c b/src/layer.c index 032a24e0f2c..758644b567b 100644 --- a/src/layer.c +++ b/src/layer.c @@ -92,9 +92,9 @@ void free_layer_custom(layer l, int keep_cudnn_desc) if (l.bias_updates) free(l.bias_updates), l.bias_updates = NULL; if (l.scales) free(l.scales), l.scales = NULL; if (l.scale_updates) free(l.scale_updates), l.scale_updates = NULL; - if (l.biases_ema) free(l.biases_ema), l.biases = NULL; - if (l.scales_ema) free(l.scales_ema), l.scales = NULL; - if (l.weights_ema) free(l.weights_ema), l.weights = NULL; + if (l.biases_ema) free(l.biases_ema), l.biases_ema = NULL; + if (l.scales_ema) free(l.scales_ema), l.scales_ema = NULL; + if (l.weights_ema) free(l.weights_ema), l.weights_ema = NULL; if (l.weights) free(l.weights), l.weights = NULL; if (l.weight_updates) free(l.weight_updates), l.weight_updates = NULL; if (l.align_bit_weights) free(l.align_bit_weights); diff --git a/src/yolo_layer.c b/src/yolo_layer.c index ac464ba39b4..de9d0990455 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -29,6 +29,7 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int l.classes = classes; l.cost = (float*)xcalloc(1, sizeof(float)); l.biases = (float*)xcalloc(total * 2, sizeof(float)); + l.nbiases = total * 2; if(mask) l.mask = mask; else{ l.mask = (int*)xcalloc(n, sizeof(int)); diff --git a/vcpkg.json b/vcpkg.json index 6f142bed231..752d0fc3669 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -73,54 +73,16 @@ } ] }, - "opencv2-base": { + "opencv2": { "description": "Build darknet with support for OpenCV2", "dependencies": [ - { - "name": "opencv2", - "features": [ - "ffmpeg" - ] - } - ] - }, - "opencv2-cuda": { - "description": "Build darknet with support for CUDA-enabled OpenCV2", - "dependencies": [ - { - "name": "opencv2", - "features": [ - "cuda", - "ffmpeg" - ] - } + "opencv2" ] }, - "opencv3-base": { + "opencv3": { "description": "Build darknet with support for OpenCV3", "dependencies": [ - { - "name": "opencv3", - "features": [ - "contrib", - "dnn", - "ffmpeg" - ] - } - ] - }, - "opencv3-cuda": { - "description": "Build darknet with support for CUDA-enabled OpenCV3", - "dependencies": [ - { - "name": "opencv3", - "features": [ - "contrib", - "cuda", - "dnn", - "ffmpeg" - ] - } + "opencv3" ] } } diff --git a/vcpkg.json.opencv23 b/vcpkg.json.opencv23 deleted file mode 100644 index 0a45be9a456..00000000000 --- a/vcpkg.json.opencv23 +++ /dev/null @@ -1,126 +0,0 @@ -{ - "name": "darknet", - "version": "0.2.5.7", - "description": "Darknet is an open source neural network framework written in C and CUDA. You only look once (YOLO) is a state-of-the-art, real-time object detection system, best example of darknet functionalities.", - "homepage": "https://github.com/alexeyab/darknet", - "dependencies": [ - { - "name": "getopt", - "platform": "windows & !mingw" - }, - "pthreads", - "stb" - ], - "features": { - "cuda": { - "description": "Build darknet with support for CUDA", - "dependencies": [ - "cuda" - ] - }, - "cudnn": { - "description": "Build darknet with support for cuDNN", - "dependencies": [ - "cuda", - "cudnn" - ] - }, - "full": { - "description": "Build darknet fully featured", - "dependencies": [ - { - "name": "darknet", - "features": [ - "cuda", - "cudnn", - "opencv-cuda" - ] - } - ] - }, - "opencv-base": { - "description": "Build darknet with support for latest version of OpenCV", - "dependencies": [ - { - "name": "opencv", - "features": [ - "contrib", - "dnn", - "ffmpeg" - ] - } - ] - }, - "opencv-cuda": { - "description": "Build darknet with support for latest version of CUDA-enabled OpenCV", - "dependencies": [ - { - "name": "opencv", - "features": [ - "contrib", - "cuda", - "dnn", - "ffmpeg" - ] - } - ] - }, - "opencv2-base": { - "description": "Build darknet with support for OpenCV2", - "dependencies": [ - { - "name": "opencv2", - "features": [ - "ffmpeg" - ] - } - ] - }, - "opencv2-cuda": { - "description": "Build darknet with support for CUDA-enabled OpenCV2", - "dependencies": [ - { - "name": "opencv2", - "features": [ - "cuda", - "ffmpeg" - ] - } - ] - }, - "opencv3-base": { - "description": "Build darknet with support for OpenCV3", - "dependencies": [ - { - "name": "opencv3", - "features": [ - "contrib", - "dnn", - "ffmpeg" - ] - } - ] - }, - "opencv3-cuda": { - "description": "Build darknet with support for CUDA-enabled OpenCV3", - "dependencies": [ - { - "name": "opencv3", - "features": [ - "contrib", - "cuda", - "dnn", - "ffmpeg" - ] - } - ] - } - }, - "overrides": [ - { - "name": "ffmpeg", - "version": "4.4.3" - } - ], - "builtin-baseline": "54cc53c43430c73f489e52af5fadd032c1aced16" -}