From 30113b6853f7ba39f18ebc9154be9850e4bc0f0b Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 9 Aug 2024 16:08:08 +0200 Subject: [PATCH] Rework the package, and add POCL-based CPU CI (#210) --- .buildkite/pipeline.yml | 8 +- .github/workflows/CI.yml | 37 + Project.toml | 12 +- README.md | 8 +- examples/demo.jl | 6 +- examples/hands_on_opencl/ex04/vadd_chain.jl | 14 +- examples/hands_on_opencl/ex05/vadd_abc.jl | 8 +- examples/hands_on_opencl/ex06/matmul.jl | 6 +- examples/hands_on_opencl/ex07/matmul.jl | 6 +- examples/hands_on_opencl/ex08/matmul.jl | 6 +- examples/hands_on_opencl/ex09/pi_ocl.jl | 2 +- examples/hands_on_opencl/exA/pi_vocl.jl | 2 +- examples/notebooks/Transpose.ipynb | 4 +- examples/notebooks/julia_set_fractal.ipynb | 14 +- examples/notebooks/mandelbrot_fractal.ipynb | 12 +- examples/performance.jl | 6 +- lib/libopencl.jl | 2252 +++++++++++++++++++ res/Project.toml | 4 + res/opencl.toml | 12 + res/opencl_prologue.jl | 18 + res/wrap.jl | 110 + src/OpenCL.jl | 14 +- src/api.jl | 107 +- src/api/opencl_1.0.0.jl | 210 -- src/api/opencl_1.1.0.jl | 69 - src/api/opencl_1.2.0.jl | 72 - src/api/opencl_2.0.0.jl | 55 - src/array.jl | 4 +- src/buffer.jl | 77 +- src/constants.jl | 522 ----- src/context.jl | 84 +- src/device.jl | 126 +- src/error.jl | 2 +- src/event.jl | 116 +- src/kernel.jl | 76 +- src/macros.jl | 25 +- src/memory.jl | 36 +- src/platform.jl | 36 +- src/program.jl | 82 +- src/queue.jl | 44 +- src/types.jl | 171 -- src/util.jl | 12 +- test/Project.toml | 4 + test/array.jl | 52 + test/behaviour.jl | 277 +++ test/buffer.jl | 213 ++ test/cmdqueue.jl | 36 + test/context.jl | 110 + test/device.jl | 87 + test/event.jl | 75 + test/kernel.jl | 210 ++ test/{test_memory.jl => memory.jl} | 14 +- test/minver.jl | 33 + test/platform.jl | 32 + test/program.jl | 78 + test/runtests.jl | 52 +- test/script.gdb | 3 - test/test_array.jl | 60 - test/test_behaviour.jl | 328 --- test/test_buffer.jl | 224 -- test/test_cmdqueue.jl | 47 - test/test_context.jl | 123 - test/test_device.jl | 99 - test/test_event.jl | 107 - test/test_kernel.jl | 257 --- test/test_minver.jl | 40 - test/test_platform.jl | 34 - test/test_program.jl | 99 - 68 files changed, 4171 insertions(+), 3010 deletions(-) create mode 100644 .github/workflows/CI.yml create mode 100644 lib/libopencl.jl create mode 100644 res/Project.toml create mode 100644 res/opencl.toml create mode 100644 res/opencl_prologue.jl create mode 100644 res/wrap.jl delete mode 100644 src/api/opencl_1.0.0.jl delete mode 100644 src/api/opencl_1.1.0.jl delete mode 100644 src/api/opencl_1.2.0.jl delete mode 100644 src/api/opencl_2.0.0.jl delete mode 100644 src/constants.jl delete mode 100644 src/types.jl create mode 100644 test/Project.toml create mode 100644 test/array.jl create mode 100644 test/behaviour.jl create mode 100644 test/buffer.jl create mode 100644 test/cmdqueue.jl create mode 100644 test/context.jl create mode 100644 test/device.jl create mode 100644 test/event.jl create mode 100644 test/kernel.jl rename test/{test_memory.jl => memory.jl} (63%) create mode 100644 test/minver.jl create mode 100644 test/platform.jl create mode 100644 test/program.jl delete mode 100644 test/script.gdb delete mode 100644 test/test_array.jl delete mode 100644 test/test_behaviour.jl delete mode 100644 test/test_buffer.jl delete mode 100644 test/test_cmdqueue.jl delete mode 100644 test/test_context.jl delete mode 100644 test/test_device.jl delete mode 100644 test/test_event.jl delete mode 100644 test/test_kernel.jl delete mode 100644 test/test_minver.jl delete mode 100644 test/test_platform.jl delete mode 100644 test/test_program.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index ec69cc3c..471a8954 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,8 +1,8 @@ steps: - - label: "Julia 1.6 - CUDA" + - label: "CUDA" plugins: - JuliaCI/julia#v1: - version: "1.6" + version: "1.10" - JuliaCI/julia-test#v1: ~ - JuliaCI/julia-coverage#v1: codecov: true @@ -10,5 +10,7 @@ steps: queue: "juliagpu" cuda: "*" if: build.message !~ /\[skip tests\]/ - command: "mkdir -p /etc/OpenCL/vendors && echo libnvidia-opencl.so.1 > /etc/OpenCL/vendors/nvidia.icd" + env: + JULIA_OPENCL_BACKEND: "CUDA" + OCL_ICD_FILENAMES: "libnvidia-opencl.so.1" timeout_in_minutes: 60 diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 00000000..5fce3b27 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,37 @@ +name: CI + +on: + push: + branches: [master] + tags: ["*"] + pull_request: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: ['1.10', 'pre'] + os: ['ubuntu-latest', 'macOS-latest'] + arch: [x64] + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v2 + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: lcov.info diff --git a/Project.toml b/Project.toml index ba58732c..d5cbec32 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "OpenCL" uuid = "08131aa3-fb12-5dee-8b74-c09406e224a2" -version = "0.9.0" +version = "0.10.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -8,11 +8,5 @@ OpenCL_jll = "6cb37087-e8b6-5417-8430-1f242f1e46e4" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" [compat] -OpenCL_jll = "2022.9.23" -julia = "1.6" - -[extras] -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[targets] -test = ["Test"] +OpenCL_jll = "2024.5.8" +julia = "1.10" diff --git a/README.md b/README.md index 8aa70b96..31f46f53 100644 --- a/README.md +++ b/README.md @@ -46,9 +46,9 @@ b = rand(Float32, 50_000) device, ctx, queue = cl.create_compute_context() -a_buff = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=a) -b_buff = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=b) -c_buff = cl.Buffer(Float32, ctx, :w, length(a)) +a_buff = cl.Buffer(Float32, ctx, length(a), (:r, :copy), hostbuf=a) +b_buff = cl.Buffer(Float32, ctx, length(b), (:r, :copy), hostbuf=b) +c_buff = cl.Buffer(Float32, ctx, length(a), :w) p = cl.Program(ctx, source=sum_kernel) |> cl.build! k = cl.Kernel(p, "sum") @@ -146,7 +146,7 @@ Here's a rough translation between the OpenCL API in C to this Julia version. Op | `clGetKernelInfo` | `cl.info(kernel, :symbol)` | Kernel info: `:name`, `:num_args`, `:reference_count`, `:program`, `:attributes` | | `clEnqueueNDRangeKernel` | `cl.enqueue_kernel(queue, kernel, global_work_size)`, `cl.enqueue_kernel(queue, kernel, global_work_size, local_work_size; global_work_offset, wait_on)` | | | `clSetKernelArg` | `cl.set_arg!(kernel, idx, arg)` | `idx` starts at 1 | -| `clCreateUserEvent` | `cl.UserEvent(ctx; retain)` | | +| `clCreateUserEvent` | `cl.UserEvent(ctx; retain)` | | | `clGetEventInfo` | `cl.info(event, :symbol)` | Event info: `:context`, `:command_queue`, `:reference_count`, `:command_type`, `:status`, `:profile_start`, `:profile_end`, `:profile_queued`, `:profile_submit`, `:profile_duration` | `clWaitForEvents` | `cl.wait(event)`, `cl.wait(events)` | | `clEnqueueMarkerWithWaitList` | `cl.enqueue_marker_with_wait_list(queue, wait_for)` | | diff --git a/examples/demo.jl b/examples/demo.jl index 8df0d56e..c67621ed 100644 --- a/examples/demo.jl +++ b/examples/demo.jl @@ -16,9 +16,9 @@ device, ctx, queue = cl.create_compute_context() # create opencl buffer objects # copies to the device initiated when the kernel function is called -a_buff = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=a) -b_buff = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=b) -c_buff = cl.Buffer(Float32, ctx, :w, length(a)) +a_buff = cl.Buffer(Float32, ctx, length(a), (:r, :copy); hostbuf=a) +b_buff = cl.Buffer(Float32, ctx, length(b), (:r, :copy); hostbuf=b) +c_buff = cl.Buffer(Float32, ctx, length(a), :w) # build the program and construct a kernel object p = cl.Program(ctx, source=sum_kernel_src) |> cl.build! diff --git a/examples/hands_on_opencl/ex04/vadd_chain.jl b/examples/hands_on_opencl/ex04/vadd_chain.jl index 5e994550..6ef817aa 100644 --- a/examples/hands_on_opencl/ex04/vadd_chain.jl +++ b/examples/hands_on_opencl/ex04/vadd_chain.jl @@ -67,14 +67,14 @@ h_g = rand(Float32, LENGTH) # {:use (use host buffer), :alloc (alloc pinned memory), :copy (default)} # Create the input (a, b, e, g) arrays in device memory and copy data from host -d_a = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=h_a) -d_b = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=h_b) -d_e = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=h_e) -d_g = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=h_g) +d_a = cl.Buffer(Float32, ctx, length(h_a), (:r, :copy), hostbuf=h_a) +d_b = cl.Buffer(Float32, ctx, length(h_b), (:r, :copy), hostbuf=h_b) +d_e = cl.Buffer(Float32, ctx, length(h_e), (:r, :copy), hostbuf=h_e) +d_g = cl.Buffer(Float32, ctx, length(h_g), (:r, :copy), hostbuf=h_g) # Create the output (c, d, f) array in device memory -d_c = cl.Buffer(Float32, ctx, :w, LENGTH) -d_d = cl.Buffer(Float32, ctx, :w, LENGTH) -d_f = cl.Buffer(Float32, ctx, :w, LENGTH) +d_c = cl.Buffer(Float32, ctx, :LENGTH, w) +d_d = cl.Buffer(Float32, ctx, :LENGTH, w) +d_f = cl.Buffer(Float32, ctx, :LENGTH, w) # create the kernel vadd = cl.Kernel(program, "vadd") diff --git a/examples/hands_on_opencl/ex05/vadd_abc.jl b/examples/hands_on_opencl/ex05/vadd_abc.jl index 7dba5deb..9747371b 100644 --- a/examples/hands_on_opencl/ex05/vadd_abc.jl +++ b/examples/hands_on_opencl/ex05/vadd_abc.jl @@ -50,12 +50,12 @@ h_a = rand(Float32, LENGTH) h_b = rand(Float32, LENGTH) h_c = rand(Float32, LENGTH) -d_a = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=h_a) -d_b = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=h_b) -d_c = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=h_c) +d_a = cl.Buffer(Float32, ctx, length(h_a), (:r, :copy), hostbuf=h_a) +d_b = cl.Buffer(Float32, ctx, length(h_b), (:r, :copy), hostbuf=h_b) +d_c = cl.Buffer(Float32, ctx, length(h_c), (:r, :copy), hostbuf=h_c) # create the output (r) buffer in device memory -d_r = cl.Buffer(Float32, ctx, :w, LENGTH) +d_r = cl.Buffer(Float32, ctx, LENGTH, :w) # create the kernel vadd = cl.Kernel(program, "vadd") diff --git a/examples/hands_on_opencl/ex06/matmul.jl b/examples/hands_on_opencl/ex06/matmul.jl index 07211974..389a4df6 100644 --- a/examples/hands_on_opencl/ex06/matmul.jl +++ b/examples/hands_on_opencl/ex06/matmul.jl @@ -112,9 +112,9 @@ ctx = cl.create_some_context() queue = cl.CmdQueue(ctx, :profile) # create OpenCL Buffers -d_a = cl.Buffer(Float32, ctx, (:r,:copy), hostbuf=h_A) -d_b = cl.Buffer(Float32, ctx, (:r,:copy), hostbuf=h_B) -d_c = cl.Buffer(Float32, ctx, :w, length(h_C)) +d_a = cl.Buffer(Float32, ctx, length(h_A), (:r,:copy), hostbuf=h_A) +d_b = cl.Buffer(Float32, ctx, length(h_B), (:r,:copy), hostbuf=h_B) +d_c = cl.Buffer(Float32, ctx, length(h_C), :w) prg = cl.Program(ctx, source=kernel_source) |> cl.build! mmul = cl.Kernel(prg, "mmul") diff --git a/examples/hands_on_opencl/ex07/matmul.jl b/examples/hands_on_opencl/ex07/matmul.jl index 7666f273..6d6347ae 100644 --- a/examples/hands_on_opencl/ex07/matmul.jl +++ b/examples/hands_on_opencl/ex07/matmul.jl @@ -93,9 +93,9 @@ ctx = cl.create_some_context() queue = cl.CmdQueue(ctx, :profile) # create OpenCL Buffers -d_a = cl.Buffer(Float32, ctx, (:r,:copy), hostbuf=h_A) -d_b = cl.Buffer(Float32, ctx, (:r,:copy), hostbuf=h_B) -d_c = cl.Buffer(Float32, ctx, :w, length(h_C)) +d_a = cl.Buffer(Float32, ctx, length(h_A), (:r,:copy), hostbuf=h_A) +d_b = cl.Buffer(Float32, ctx, length(h_B), (:r,:copy), hostbuf=h_B) +d_c = cl.Buffer(Float32, ctx, length(h_C), :w) #-------------------------------------------------------------------------------- # OpenCL matrix multiplication ... Naive diff --git a/examples/hands_on_opencl/ex08/matmul.jl b/examples/hands_on_opencl/ex08/matmul.jl index 324e2e34..b86c8892 100644 --- a/examples/hands_on_opencl/ex08/matmul.jl +++ b/examples/hands_on_opencl/ex08/matmul.jl @@ -93,9 +93,9 @@ ctx = cl.create_some_context() queue = cl.CmdQueue(ctx, :profile) # create OpenCL Buffers -d_a = cl.Buffer(Float32, ctx, (:r,:copy), hostbuf=h_A) -d_b = cl.Buffer(Float32, ctx, (:r,:copy), hostbuf=h_B) -d_c = cl.Buffer(Float32, ctx, :w, length(h_C)) +d_a = cl.Buffer(Float32, ctx, length(h_A), (:r,:copy), hostbuf=h_A) +d_b = cl.Buffer(Float32, ctx, length(h_B), (:r,:copy), hostbuf=h_B) +d_c = cl.Buffer(Float32, ctx, length(h_C), :w) #-------------------------------------------------------------------------------- # OpenCL matrix multiplication ... Naive diff --git a/examples/hands_on_opencl/ex09/pi_ocl.jl b/examples/hands_on_opencl/ex09/pi_ocl.jl index 54d60dcc..5dfc9ba6 100644 --- a/examples/hands_on_opencl/ex09/pi_ocl.jl +++ b/examples/hands_on_opencl/ex09/pi_ocl.jl @@ -60,7 +60,7 @@ h_psum = Vector{Float32}(undef, nwork_groups) println("$nwork_groups work groups of size $work_group_size.") println("$nsteps integration steps") -d_partial_sums = cl.Buffer(Float32, ctx, :w, length(h_psum)) +d_partial_sums = cl.Buffer(Float32, ctx, length(h_psum), :w) # start timer rtime = time() diff --git a/examples/hands_on_opencl/exA/pi_vocl.jl b/examples/hands_on_opencl/exA/pi_vocl.jl index 964d71a3..8ff92f5e 100644 --- a/examples/hands_on_opencl/exA/pi_vocl.jl +++ b/examples/hands_on_opencl/exA/pi_vocl.jl @@ -102,7 +102,7 @@ h_psum = Vector{Float32}(undef, nwork_groups) println("$nwork_groups work groups of size $work_group_size.") println("$nsteps integration steps") -d_partial_sums = cl.Buffer(Float32, ctx, :w, length(h_psum)) +d_partial_sums = cl.Buffer(Float32, ctx, length(h_psum), :w) # start timer rtime = time() diff --git a/examples/notebooks/Transpose.ipynb b/examples/notebooks/Transpose.ipynb index c115362b..1929a5a5 100644 --- a/examples/notebooks/Transpose.ipynb +++ b/examples/notebooks/Transpose.ipynb @@ -188,8 +188,8 @@ " (\"block\", enqueue_block_kernel, block_kernel))\n", " for s in array_sizes \n", " src = rand(Float32, (s, s))\n", - " a_buf = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=src)\n", - " a_t_buf = cl.Buffer(Float32, ctx, :w, length(src))\n", + " a_buf = cl.Buffer(Float32, ctx, length(src), (:r, :copy), hostbuf=src)\n", + " a_t_buf = cl.Buffer(Float32, ctx, length(src), :w)\n", " \n", " # warm up....\n", " for i in 1:4\n", diff --git a/examples/notebooks/julia_set_fractal.ipynb b/examples/notebooks/julia_set_fractal.ipynb index 01896bd3..d5eb7c5f 100644 --- a/examples/notebooks/julia_set_fractal.ipynb +++ b/examples/notebooks/julia_set_fractal.ipynb @@ -295,16 +295,16 @@ "julia_source = \"\n", "\n", "__kernel void julia(__global float2 *q,\n", - " __global ushort *output, \n", + " __global ushort *output,\n", " ushort const maxiter)\n", "{\n", " int gid = get_global_id(0);\n", " float nreal = 0;\n", " float real = q[gid].x;\n", " float imag = q[gid].y;\n", - " \n", + "\n", " output[gid] = 0;\n", - " \n", + "\n", " for(int curiter = 0; curiter < maxiter; curiter++) {\n", " if (real*real + imag*imag > 4.0f) {\n", " output[gid] = curiter;\n", @@ -341,15 +341,15 @@ "\n", " out = Array{UInt16}(size(q))\n", "\n", - " q_buff = cl.Buffer(Complex64, ctx, (:r, :copy), hostbuf=q)\n", - " o_buff = cl.Buffer(UInt16, ctx, :w, length(out))\n", + " q_buff = cl.Buffer(Complex64, ctx, length(q), (:r, :copy), hostbuf=q)\n", + " o_buff = cl.Buffer(UInt16, ctx, length(out), :w)\n", "\n", " prg = cl.Program(ctx, source=julia_source) |> cl.build!\n", " k = cl.Kernel(prg, \"julia\")\n", - " \n", + "\n", " queue(k, length(q), nothing, q_buff, o_buff, UInt16(maxiter))\n", " cl.copy!(queue, out, o_buff)\n", - " \n", + "\n", " return out\n", "end\n" ] diff --git a/examples/notebooks/mandelbrot_fractal.ipynb b/examples/notebooks/mandelbrot_fractal.ipynb index 8320d241..e8a55018 100644 --- a/examples/notebooks/mandelbrot_fractal.ipynb +++ b/examples/notebooks/mandelbrot_fractal.ipynb @@ -55,7 +55,7 @@ "source": [ "mandel_source = \"\n", "__kernel void mandelbrot(__global float2 *q,\n", - " __global ushort *output, \n", + " __global ushort *output,\n", " ushort const maxiter)\n", "{\n", " int gid = get_global_id(0);\n", @@ -98,18 +98,18 @@ "\n", " out = Array{UInt16}(size(q))\n", "\n", - " q_buff = cl.Buffer(Complex64, ctx, (:r, :copy), hostbuf=q)\n", - " o_buff = cl.Buffer(UInt16, ctx, :w, length(out))\n", + " q_buff = cl.Buffer(Complex64, ctx, length(q), (:r, :copy), hostbuf=q)\n", + " o_buff = cl.Buffer(UInt16, ctx, length(out), :w)\n", "\n", " prg = cl.Program(ctx, source=mandel_source) |> cl.build!\n", - " \n", + "\n", " k = cl.Kernel(prg, \"mandelbrot\")\n", " #cl.call(queue, k, length(out), nothing, q_buff, o_buff, uint16(maxiter))\n", " queue(k, length(q), nothing, q_buff, o_buff, UInt16(maxiter))\n", "\n", "\n", " cl.copy!(queue, out, o_buff)\n", - " \n", + "\n", " return out\n", "end" ] @@ -150,7 +150,7 @@ " y2 = 1.0\n", " x1 = -1.5\n", " x2 = 0.5\n", - " \n", + "\n", " q = Array{Complex64}(h, w)\n", " for x in 1:w\n", " for y in 1:h\n", diff --git a/examples/performance.jl b/examples/performance.jl index 397586c7..6cb3f62f 100644 --- a/examples/performance.jl +++ b/examples/performance.jl @@ -79,9 +79,9 @@ function cl_performance(ndatapts::Integer, nworkers::Integer) ctx = cl.Context(device) queue = cl.CmdQueue(ctx, :profile) - a_buf = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=a) - b_buf = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=b) - c_buf = cl.Buffer(Float32, ctx, :w, length(a)) + a_buf = cl.Buffer(Float32, ctx, length(a), (:r, :copy), hostbuf=a) + b_buf = cl.Buffer(Float32, ctx, length(b), (:r, :copy), hostbuf=b) + c_buf = cl.Buffer(Float32, ctx, length(a), :w) prg = cl.Program(ctx, source=bench_kernel) |> cl.build! kern = cl.Kernel(prg, "sum") diff --git a/lib/libopencl.jl b/lib/libopencl.jl new file mode 100644 index 00000000..361af362 --- /dev/null +++ b/lib/libopencl.jl @@ -0,0 +1,2252 @@ +# outlined functionality to avoid GC frame allocation +@noinline function throw_api_error(res) + throw(CLError(res)) +end + +function check(f) + res = retry_reclaim(err -> err == CL_OUT_OF_RESOURCES || + err == CL_MEM_OBJECT_ALLOCATION_FAILURE || + err == CL_OUT_OF_HOST_MEMORY) do + return f() + end + + if res != CL_SUCCESS + throw_api_error(res) + end + + return +end + +const intptr_t = Clong + +const cl_int = Int32 + +const cl_uint = UInt32 + +const cl_ulong = UInt64 + +const cl_GLuint = Cuint + +const cl_GLint = Cint + +const cl_GLenum = Cuint + +mutable struct _cl_platform_id end + +mutable struct _cl_device_id end + +mutable struct _cl_context end + +mutable struct _cl_command_queue end + +mutable struct _cl_mem end + +mutable struct _cl_program end + +mutable struct _cl_kernel end + +mutable struct _cl_event end + +mutable struct _cl_sampler end + +const cl_platform_id = Ptr{_cl_platform_id} + +const cl_device_id = Ptr{_cl_device_id} + +const cl_context = Ptr{_cl_context} + +const cl_command_queue = Ptr{_cl_command_queue} + +const cl_mem = Ptr{_cl_mem} + +const cl_program = Ptr{_cl_program} + +const cl_kernel = Ptr{_cl_kernel} + +const cl_event = Ptr{_cl_event} + +const cl_sampler = Ptr{_cl_sampler} + +const cl_bool = cl_uint + +const cl_bitfield = cl_ulong + +const cl_properties = cl_ulong + +const cl_device_type = cl_bitfield + +const cl_platform_info = cl_uint + +const cl_device_info = cl_uint + +const cl_device_fp_config = cl_bitfield + +const cl_device_mem_cache_type = cl_uint + +const cl_device_local_mem_type = cl_uint + +const cl_device_exec_capabilities = cl_bitfield + +const cl_device_svm_capabilities = cl_bitfield + +const cl_command_queue_properties = cl_bitfield + +const cl_device_partition_property = intptr_t + +const cl_device_affinity_domain = cl_bitfield + +const cl_context_properties = intptr_t + +const cl_context_info = cl_uint + +const cl_queue_properties = cl_properties + +const cl_command_queue_info = cl_uint + +const cl_channel_order = cl_uint + +const cl_channel_type = cl_uint + +const cl_mem_flags = cl_bitfield + +const cl_svm_mem_flags = cl_bitfield + +const cl_mem_object_type = cl_uint + +const cl_mem_info = cl_uint + +const cl_mem_migration_flags = cl_bitfield + +const cl_image_info = cl_uint + +const cl_buffer_create_type = cl_uint + +const cl_addressing_mode = cl_uint + +const cl_filter_mode = cl_uint + +const cl_sampler_info = cl_uint + +const cl_map_flags = cl_bitfield + +const cl_pipe_properties = intptr_t + +const cl_pipe_info = cl_uint + +const cl_program_info = cl_uint + +const cl_program_build_info = cl_uint + +const cl_program_binary_type = cl_uint + +const cl_build_status = cl_int + +const cl_kernel_info = cl_uint + +const cl_kernel_arg_info = cl_uint + +const cl_kernel_arg_address_qualifier = cl_uint + +const cl_kernel_arg_access_qualifier = cl_uint + +const cl_kernel_arg_type_qualifier = cl_bitfield + +const cl_kernel_work_group_info = cl_uint + +const cl_kernel_sub_group_info = cl_uint + +const cl_event_info = cl_uint + +const cl_command_type = cl_uint + +const cl_profiling_info = cl_uint + +const cl_sampler_properties = cl_properties + +const cl_kernel_exec_info = cl_uint + +const cl_device_atomic_capabilities = cl_bitfield + +const cl_device_device_enqueue_capabilities = cl_bitfield + +const cl_khronos_vendor_id = cl_uint + +const cl_mem_properties = cl_properties + +const cl_version = cl_uint + +struct _cl_image_format + image_channel_order::cl_channel_order + image_channel_data_type::cl_channel_type +end + +const cl_image_format = _cl_image_format + +struct _cl_image_desc + data::NTuple{72,UInt8} +end + +function Base.getproperty(x::Ptr{_cl_image_desc}, f::Symbol) + f === :image_type && return Ptr{cl_mem_object_type}(x + 0) + f === :image_width && return Ptr{Csize_t}(x + 8) + f === :image_height && return Ptr{Csize_t}(x + 16) + f === :image_depth && return Ptr{Csize_t}(x + 24) + f === :image_array_size && return Ptr{Csize_t}(x + 32) + f === :image_row_pitch && return Ptr{Csize_t}(x + 40) + f === :image_slice_pitch && return Ptr{Csize_t}(x + 48) + f === :num_mip_levels && return Ptr{cl_uint}(x + 56) + f === :num_samples && return Ptr{cl_uint}(x + 60) + f === :buffer && return Ptr{cl_mem}(x + 64) + f === :mem_object && return Ptr{cl_mem}(x + 64) + return getfield(x, f) +end + +function Base.getproperty(x::_cl_image_desc, f::Symbol) + r = Ref{_cl_image_desc}(x) + ptr = Base.unsafe_convert(Ptr{_cl_image_desc}, r) + fptr = getproperty(ptr, f) + GC.@preserve r unsafe_load(fptr) +end + +function Base.setproperty!(x::Ptr{_cl_image_desc}, f::Symbol, v) + return unsafe_store!(getproperty(x, f), v) +end + +const cl_image_desc = _cl_image_desc + +struct _cl_buffer_region + origin::Csize_t + size::Csize_t +end + +const cl_buffer_region = _cl_buffer_region + +struct _cl_name_version + version::cl_version + name::NTuple{64,Cchar} +end + +const cl_name_version = _cl_name_version + +@checked function clGetPlatformIDs(num_entries, platforms, num_platforms) + @ccall libopencl.clGetPlatformIDs(num_entries::cl_uint, platforms::Ptr{cl_platform_id}, + num_platforms::Ptr{cl_uint})::cl_int +end + +@checked function clGetPlatformInfo(platform, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetPlatformInfo(platform::cl_platform_id, + param_name::cl_platform_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clGetDeviceIDs(platform, device_type, num_entries, devices, num_devices) + @ccall libopencl.clGetDeviceIDs(platform::cl_platform_id, device_type::cl_device_type, + num_entries::cl_uint, devices::Ptr{cl_device_id}, + num_devices::Ptr{cl_uint})::cl_int +end + +@checked function clGetDeviceInfo(device, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetDeviceInfo(device::cl_device_id, param_name::cl_device_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clCreateSubDevices(in_device, properties, num_devices, out_devices, + num_devices_ret) + @ccall libopencl.clCreateSubDevices(in_device::cl_device_id, + properties::Ptr{cl_device_partition_property}, + num_devices::cl_uint, + out_devices::Ptr{cl_device_id}, + num_devices_ret::Ptr{cl_uint})::cl_int +end + +@checked function clRetainDevice(device) + @ccall libopencl.clRetainDevice(device::cl_device_id)::cl_int +end + +@checked function clReleaseDevice(device) + @ccall libopencl.clReleaseDevice(device::cl_device_id)::cl_int +end + +@checked function clSetDefaultDeviceCommandQueue(context, device, command_queue) + @ccall libopencl.clSetDefaultDeviceCommandQueue(context::cl_context, + device::cl_device_id, + command_queue::cl_command_queue)::cl_int +end + +@checked function clGetDeviceAndHostTimer(device, device_timestamp, host_timestamp) + @ccall libopencl.clGetDeviceAndHostTimer(device::cl_device_id, + device_timestamp::Ptr{cl_ulong}, + host_timestamp::Ptr{cl_ulong})::cl_int +end + +@checked function clGetHostTimer(device, host_timestamp) + @ccall libopencl.clGetHostTimer(device::cl_device_id, + host_timestamp::Ptr{cl_ulong})::cl_int +end + +function clCreateContext(properties, num_devices, devices, pfn_notify, user_data, + errcode_ret) + @ccall libopencl.clCreateContext(properties::Ptr{cl_context_properties}, + num_devices::cl_uint, devices::Ptr{cl_device_id}, + pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int})::cl_context +end + +function clCreateContextFromType(properties, device_type, pfn_notify, user_data, + errcode_ret) + @ccall libopencl.clCreateContextFromType(properties::Ptr{cl_context_properties}, + device_type::cl_device_type, + pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int})::cl_context +end + +@checked function clRetainContext(context) + @ccall libopencl.clRetainContext(context::cl_context)::cl_int +end + +@checked function clReleaseContext(context) + @ccall libopencl.clReleaseContext(context::cl_context)::cl_int +end + +@checked function clGetContextInfo(context, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetContextInfo(context::cl_context, param_name::cl_context_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clSetContextDestructorCallback(context, pfn_notify, user_data) + @ccall libopencl.clSetContextDestructorCallback(context::cl_context, + pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid})::cl_int +end + +function clCreateCommandQueueWithProperties(context, device, properties, errcode_ret) + @ccall libopencl.clCreateCommandQueueWithProperties(context::cl_context, + device::cl_device_id, + properties::Ptr{cl_queue_properties}, + errcode_ret::Ptr{cl_int})::cl_command_queue +end + +@checked function clRetainCommandQueue(command_queue) + @ccall libopencl.clRetainCommandQueue(command_queue::cl_command_queue)::cl_int +end + +@checked function clReleaseCommandQueue(command_queue) + @ccall libopencl.clReleaseCommandQueue(command_queue::cl_command_queue)::cl_int +end + +@checked function clGetCommandQueueInfo(command_queue, param_name, param_value_size, + param_value, param_value_size_ret) + @ccall libopencl.clGetCommandQueueInfo(command_queue::cl_command_queue, + param_name::cl_command_queue_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +function clCreateBuffer(context, flags, size, host_ptr, errcode_ret) + @ccall libopencl.clCreateBuffer(context::cl_context, flags::cl_mem_flags, size::Csize_t, + host_ptr::Ptr{Cvoid}, errcode_ret::Ptr{cl_int})::cl_mem +end + +function clCreateSubBuffer(buffer, flags, buffer_create_type, buffer_create_info, + errcode_ret) + @ccall libopencl.clCreateSubBuffer(buffer::cl_mem, flags::cl_mem_flags, + buffer_create_type::cl_buffer_create_type, + buffer_create_info::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int})::cl_mem +end + +function clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret) + @ccall libopencl.clCreateImage(context::cl_context, flags::cl_mem_flags, + image_format::Ptr{cl_image_format}, + image_desc::Ptr{cl_image_desc}, host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int})::cl_mem +end + +function clCreatePipe(context, flags, pipe_packet_size, pipe_max_packets, properties, + errcode_ret) + @ccall libopencl.clCreatePipe(context::cl_context, flags::cl_mem_flags, + pipe_packet_size::cl_uint, pipe_max_packets::cl_uint, + properties::Ptr{cl_pipe_properties}, + errcode_ret::Ptr{cl_int})::cl_mem +end + +function clCreateBufferWithProperties(context, properties, flags, size, host_ptr, + errcode_ret) + @ccall libopencl.clCreateBufferWithProperties(context::cl_context, + properties::Ptr{cl_mem_properties}, + flags::cl_mem_flags, size::Csize_t, + host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int})::cl_mem +end + +function clCreateImageWithProperties(context, properties, flags, image_format, image_desc, + host_ptr, errcode_ret) + @ccall libopencl.clCreateImageWithProperties(context::cl_context, + properties::Ptr{cl_mem_properties}, + flags::cl_mem_flags, + image_format::Ptr{cl_image_format}, + image_desc::Ptr{cl_image_desc}, + host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int})::cl_mem +end + +@checked function clRetainMemObject(memobj) + @ccall libopencl.clRetainMemObject(memobj::cl_mem)::cl_int +end + +@checked function clReleaseMemObject(memobj) + @ccall libopencl.clReleaseMemObject(memobj::cl_mem)::cl_int +end + +@checked function clGetSupportedImageFormats(context, flags, image_type, num_entries, + image_formats, num_image_formats) + @ccall libopencl.clGetSupportedImageFormats(context::cl_context, flags::cl_mem_flags, + image_type::cl_mem_object_type, + num_entries::cl_uint, + image_formats::Ptr{cl_image_format}, + num_image_formats::Ptr{cl_uint})::cl_int +end + +@checked function clGetMemObjectInfo(memobj, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetMemObjectInfo(memobj::cl_mem, param_name::cl_mem_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clGetImageInfo(image, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetImageInfo(image::cl_mem, param_name::cl_image_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clGetPipeInfo(pipe, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetPipeInfo(pipe::cl_mem, param_name::cl_pipe_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clSetMemObjectDestructorCallback(memobj, pfn_notify, user_data) + @ccall libopencl.clSetMemObjectDestructorCallback(memobj::cl_mem, + pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid})::cl_int +end + +function clSVMAlloc(context, flags, size, alignment) + @ccall libopencl.clSVMAlloc(context::cl_context, flags::cl_svm_mem_flags, size::Csize_t, + alignment::cl_uint)::Ptr{Cvoid} +end + +function clSVMFree(context, svm_pointer) + @ccall libopencl.clSVMFree(context::cl_context, svm_pointer::Ptr{Cvoid})::Cvoid +end + +function clCreateSamplerWithProperties(context, sampler_properties, errcode_ret) + @ccall libopencl.clCreateSamplerWithProperties(context::cl_context, + sampler_properties::Ptr{cl_sampler_properties}, + errcode_ret::Ptr{cl_int})::cl_sampler +end + +@checked function clRetainSampler(sampler) + @ccall libopencl.clRetainSampler(sampler::cl_sampler)::cl_int +end + +@checked function clReleaseSampler(sampler) + @ccall libopencl.clReleaseSampler(sampler::cl_sampler)::cl_int +end + +@checked function clGetSamplerInfo(sampler, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetSamplerInfo(sampler::cl_sampler, param_name::cl_sampler_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +function clCreateProgramWithSource(context, count, strings, lengths, errcode_ret) + @ccall libopencl.clCreateProgramWithSource(context::cl_context, count::cl_uint, + strings::Ptr{Ptr{Cchar}}, + lengths::Ptr{Csize_t}, + errcode_ret::Ptr{cl_int})::cl_program +end + +function clCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, + binary_status, errcode_ret) + @ccall libopencl.clCreateProgramWithBinary(context::cl_context, num_devices::cl_uint, + device_list::Ptr{cl_device_id}, + lengths::Ptr{Csize_t}, + binaries::Ptr{Ptr{Cuchar}}, + binary_status::Ptr{cl_int}, + errcode_ret::Ptr{cl_int})::cl_program +end + +function clCreateProgramWithBuiltInKernels(context, num_devices, device_list, kernel_names, + errcode_ret) + @ccall libopencl.clCreateProgramWithBuiltInKernels(context::cl_context, + num_devices::cl_uint, + device_list::Ptr{cl_device_id}, + kernel_names::Ptr{Cchar}, + errcode_ret::Ptr{cl_int})::cl_program +end + +function clCreateProgramWithIL(context, il, length, errcode_ret) + @ccall libopencl.clCreateProgramWithIL(context::cl_context, il::Ptr{Cvoid}, + length::Csize_t, + errcode_ret::Ptr{cl_int})::cl_program +end + +@checked function clRetainProgram(program) + @ccall libopencl.clRetainProgram(program::cl_program)::cl_int +end + +@checked function clReleaseProgram(program) + @ccall libopencl.clReleaseProgram(program::cl_program)::cl_int +end + +@checked function clBuildProgram(program, num_devices, device_list, options, pfn_notify, + user_data) + @ccall libopencl.clBuildProgram(program::cl_program, num_devices::cl_uint, + device_list::Ptr{cl_device_id}, options::Ptr{Cchar}, + pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid})::cl_int +end + +@checked function clCompileProgram(program, num_devices, device_list, options, + num_input_headers, input_headers, header_include_names, + pfn_notify, user_data) + @ccall libopencl.clCompileProgram(program::cl_program, num_devices::cl_uint, + device_list::Ptr{cl_device_id}, options::Ptr{Cchar}, + num_input_headers::cl_uint, + input_headers::Ptr{cl_program}, + header_include_names::Ptr{Ptr{Cchar}}, + pfn_notify::Ptr{Cvoid}, user_data::Ptr{Cvoid})::cl_int +end + +function clLinkProgram(context, num_devices, device_list, options, num_input_programs, + input_programs, pfn_notify, user_data, errcode_ret) + @ccall libopencl.clLinkProgram(context::cl_context, num_devices::cl_uint, + device_list::Ptr{cl_device_id}, options::Ptr{Cchar}, + num_input_programs::cl_uint, + input_programs::Ptr{cl_program}, pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int})::cl_program +end + +@checked function clSetProgramReleaseCallback(program, pfn_notify, user_data) + @ccall libopencl.clSetProgramReleaseCallback(program::cl_program, + pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid})::cl_int +end + +@checked function clSetProgramSpecializationConstant(program, spec_id, spec_size, + spec_value) + @ccall libopencl.clSetProgramSpecializationConstant(program::cl_program, + spec_id::cl_uint, + spec_size::Csize_t, + spec_value::Ptr{Cvoid})::cl_int +end + +@checked function clUnloadPlatformCompiler(platform) + @ccall libopencl.clUnloadPlatformCompiler(platform::cl_platform_id)::cl_int +end + +@checked function clGetProgramInfo(program, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetProgramInfo(program::cl_program, param_name::cl_program_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clGetProgramBuildInfo(program, device, param_name, param_value_size, + param_value, param_value_size_ret) + @ccall libopencl.clGetProgramBuildInfo(program::cl_program, device::cl_device_id, + param_name::cl_program_build_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +function clCreateKernel(program, kernel_name, errcode_ret) + @ccall libopencl.clCreateKernel(program::cl_program, kernel_name::Ptr{Cchar}, + errcode_ret::Ptr{cl_int})::cl_kernel +end + +@checked function clCreateKernelsInProgram(program, num_kernels, kernels, num_kernels_ret) + @ccall libopencl.clCreateKernelsInProgram(program::cl_program, num_kernels::cl_uint, + kernels::Ptr{cl_kernel}, + num_kernels_ret::Ptr{cl_uint})::cl_int +end + +function clCloneKernel(source_kernel, errcode_ret) + @ccall libopencl.clCloneKernel(source_kernel::cl_kernel, + errcode_ret::Ptr{cl_int})::cl_kernel +end + +@checked function clRetainKernel(kernel) + @ccall libopencl.clRetainKernel(kernel::cl_kernel)::cl_int +end + +@checked function clReleaseKernel(kernel) + @ccall libopencl.clReleaseKernel(kernel::cl_kernel)::cl_int +end + +@checked function clSetKernelArg(kernel, arg_index, arg_size, arg_value) + @ccall libopencl.clSetKernelArg(kernel::cl_kernel, arg_index::cl_uint, + arg_size::Csize_t, arg_value::Ptr{Cvoid})::cl_int +end + +@checked function clSetKernelArgSVMPointer(kernel, arg_index, arg_value) + @ccall libopencl.clSetKernelArgSVMPointer(kernel::cl_kernel, arg_index::cl_uint, + arg_value::Ptr{Cvoid})::cl_int +end + +@checked function clSetKernelExecInfo(kernel, param_name, param_value_size, param_value) + @ccall libopencl.clSetKernelExecInfo(kernel::cl_kernel, param_name::cl_kernel_exec_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid})::cl_int +end + +@checked function clGetKernelInfo(kernel, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetKernelInfo(kernel::cl_kernel, param_name::cl_kernel_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clGetKernelArgInfo(kernel, arg_indx, param_name, param_value_size, + param_value, param_value_size_ret) + @ccall libopencl.clGetKernelArgInfo(kernel::cl_kernel, arg_indx::cl_uint, + param_name::cl_kernel_arg_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clGetKernelWorkGroupInfo(kernel, device, param_name, param_value_size, + param_value, param_value_size_ret) + @ccall libopencl.clGetKernelWorkGroupInfo(kernel::cl_kernel, device::cl_device_id, + param_name::cl_kernel_work_group_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clGetKernelSubGroupInfo(kernel, device, param_name, input_value_size, + input_value, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetKernelSubGroupInfo(kernel::cl_kernel, device::cl_device_id, + param_name::cl_kernel_sub_group_info, + input_value_size::Csize_t, + input_value::Ptr{Cvoid}, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clWaitForEvents(num_events, event_list) + @ccall libopencl.clWaitForEvents(num_events::cl_uint, event_list::Ptr{cl_event})::cl_int +end + +@checked function clGetEventInfo(event, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetEventInfo(event::cl_event, param_name::cl_event_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +function clCreateUserEvent(context, errcode_ret) + @ccall libopencl.clCreateUserEvent(context::cl_context, + errcode_ret::Ptr{cl_int})::cl_event +end + +@checked function clRetainEvent(event) + @ccall libopencl.clRetainEvent(event::cl_event)::cl_int +end + +@checked function clReleaseEvent(event) + @ccall libopencl.clReleaseEvent(event::cl_event)::cl_int +end + +@checked function clSetUserEventStatus(event, execution_status) + @ccall libopencl.clSetUserEventStatus(event::cl_event, execution_status::cl_int)::cl_int +end + +@checked function clSetEventCallback(event, command_exec_callback_type, pfn_notify, + user_data) + @ccall libopencl.clSetEventCallback(event::cl_event, command_exec_callback_type::cl_int, + pfn_notify::Ptr{Cvoid}, + user_data::Ptr{Cvoid})::cl_int +end + +@checked function clGetEventProfilingInfo(event, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetEventProfilingInfo(event::cl_event, param_name::cl_profiling_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clFlush(command_queue) + @ccall libopencl.clFlush(command_queue::cl_command_queue)::cl_int +end + +@checked function clFinish(command_queue) + @ccall libopencl.clFinish(command_queue::cl_command_queue)::cl_int +end + +@checked function clEnqueueReadBuffer(command_queue, buffer, blocking_read, offset, size, + ptr, num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueReadBuffer(command_queue::cl_command_queue, buffer::cl_mem, + blocking_read::cl_bool, offset::Csize_t, + size::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueReadBufferRect(command_queue, buffer, blocking_read, + buffer_origin, host_origin, region, + buffer_row_pitch, buffer_slice_pitch, + host_row_pitch, host_slice_pitch, ptr, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueReadBufferRect(command_queue::cl_command_queue, + buffer::cl_mem, blocking_read::cl_bool, + buffer_origin::Ptr{Csize_t}, + host_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + buffer_row_pitch::Csize_t, + buffer_slice_pitch::Csize_t, + host_row_pitch::Csize_t, + host_slice_pitch::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueWriteBuffer(command_queue, buffer, blocking_write, offset, size, + ptr, num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueWriteBuffer(command_queue::cl_command_queue, buffer::cl_mem, + blocking_write::cl_bool, offset::Csize_t, + size::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueWriteBufferRect(command_queue, buffer, blocking_write, + buffer_origin, host_origin, region, + buffer_row_pitch, buffer_slice_pitch, + host_row_pitch, host_slice_pitch, ptr, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueWriteBufferRect(command_queue::cl_command_queue, + buffer::cl_mem, blocking_write::cl_bool, + buffer_origin::Ptr{Csize_t}, + host_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + buffer_row_pitch::Csize_t, + buffer_slice_pitch::Csize_t, + host_row_pitch::Csize_t, + host_slice_pitch::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueFillBuffer(command_queue, buffer, pattern, pattern_size, offset, + size, num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueFillBuffer(command_queue::cl_command_queue, buffer::cl_mem, + pattern::Ptr{Cvoid}, pattern_size::Csize_t, + offset::Csize_t, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueCopyBuffer(command_queue, src_buffer, dst_buffer, src_offset, + dst_offset, size, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueCopyBuffer(command_queue::cl_command_queue, + src_buffer::cl_mem, dst_buffer::cl_mem, + src_offset::Csize_t, dst_offset::Csize_t, + size::Csize_t, num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueCopyBufferRect(command_queue, src_buffer, dst_buffer, src_origin, + dst_origin, region, src_row_pitch, + src_slice_pitch, dst_row_pitch, dst_slice_pitch, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueCopyBufferRect(command_queue::cl_command_queue, + src_buffer::cl_mem, dst_buffer::cl_mem, + src_origin::Ptr{Csize_t}, + dst_origin::Ptr{Csize_t}, region::Ptr{Csize_t}, + src_row_pitch::Csize_t, + src_slice_pitch::Csize_t, + dst_row_pitch::Csize_t, + dst_slice_pitch::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueReadImage(command_queue, image, blocking_read, origin, region, + row_pitch, slice_pitch, ptr, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueReadImage(command_queue::cl_command_queue, image::cl_mem, + blocking_read::cl_bool, origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, row_pitch::Csize_t, + slice_pitch::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueWriteImage(command_queue, image, blocking_write, origin, region, + input_row_pitch, input_slice_pitch, ptr, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueWriteImage(command_queue::cl_command_queue, image::cl_mem, + blocking_write::cl_bool, origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, input_row_pitch::Csize_t, + input_slice_pitch::Csize_t, ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueFillImage(command_queue, image, fill_color, origin, region, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueFillImage(command_queue::cl_command_queue, image::cl_mem, + fill_color::Ptr{Cvoid}, origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueCopyImage(command_queue, src_image, dst_image, src_origin, + dst_origin, region, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueCopyImage(command_queue::cl_command_queue, src_image::cl_mem, + dst_image::cl_mem, src_origin::Ptr{Csize_t}, + dst_origin::Ptr{Csize_t}, region::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueCopyImageToBuffer(command_queue, src_image, dst_buffer, + src_origin, region, dst_offset, + num_events_in_wait_list, event_wait_list, + event) + @ccall libopencl.clEnqueueCopyImageToBuffer(command_queue::cl_command_queue, + src_image::cl_mem, dst_buffer::cl_mem, + src_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, dst_offset::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueCopyBufferToImage(command_queue, src_buffer, dst_image, + src_offset, dst_origin, region, + num_events_in_wait_list, event_wait_list, + event) + @ccall libopencl.clEnqueueCopyBufferToImage(command_queue::cl_command_queue, + src_buffer::cl_mem, dst_image::cl_mem, + src_offset::Csize_t, + dst_origin::Ptr{Csize_t}, + region::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +function clEnqueueMapBuffer(command_queue, buffer, blocking_map, map_flags, offset, size, + num_events_in_wait_list, event_wait_list, event, errcode_ret) + @ccall libopencl.clEnqueueMapBuffer(command_queue::cl_command_queue, buffer::cl_mem, + blocking_map::cl_bool, map_flags::cl_map_flags, + offset::Csize_t, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event}, + errcode_ret::Ptr{cl_int})::Ptr{Cvoid} +end + +function clEnqueueMapImage(command_queue, image, blocking_map, map_flags, origin, region, + image_row_pitch, image_slice_pitch, num_events_in_wait_list, + event_wait_list, event, errcode_ret) + @ccall libopencl.clEnqueueMapImage(command_queue::cl_command_queue, image::cl_mem, + blocking_map::cl_bool, map_flags::cl_map_flags, + origin::Ptr{Csize_t}, region::Ptr{Csize_t}, + image_row_pitch::Ptr{Csize_t}, + image_slice_pitch::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, event::Ptr{cl_event}, + errcode_ret::Ptr{cl_int})::Ptr{Cvoid} +end + +@checked function clEnqueueUnmapMemObject(command_queue, memobj, mapped_ptr, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueUnmapMemObject(command_queue::cl_command_queue, + memobj::cl_mem, mapped_ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueMigrateMemObjects(command_queue, num_mem_objects, mem_objects, + flags, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueMigrateMemObjects(command_queue::cl_command_queue, + num_mem_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + flags::cl_mem_migration_flags, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueNDRangeKernel(command_queue, kernel, work_dim, + global_work_offset, global_work_size, + local_work_size, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueNDRangeKernel(command_queue::cl_command_queue, + kernel::cl_kernel, work_dim::cl_uint, + global_work_offset::Ptr{Csize_t}, + global_work_size::Ptr{Csize_t}, + local_work_size::Ptr{Csize_t}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueNativeKernel(command_queue, user_func, args, cb_args, + num_mem_objects, mem_list, args_mem_loc, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueNativeKernel(command_queue::cl_command_queue, + user_func::Ptr{Cvoid}, args::Ptr{Cvoid}, + cb_args::Csize_t, num_mem_objects::cl_uint, + mem_list::Ptr{cl_mem}, + args_mem_loc::Ptr{Ptr{Cvoid}}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueMarkerWithWaitList(command_queue, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueMarkerWithWaitList(command_queue::cl_command_queue, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueBarrierWithWaitList(command_queue, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueBarrierWithWaitList(command_queue::cl_command_queue, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueSVMFree(command_queue, num_svm_pointers, svm_pointers, + pfn_free_func, user_data, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueSVMFree(command_queue::cl_command_queue, + num_svm_pointers::cl_uint, + svm_pointers::Ptr{Ptr{Cvoid}}, + pfn_free_func::Ptr{Cvoid}, user_data::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueSVMMemcpy(command_queue, blocking_copy, dst_ptr, src_ptr, size, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueSVMMemcpy(command_queue::cl_command_queue, + blocking_copy::cl_bool, dst_ptr::Ptr{Cvoid}, + src_ptr::Ptr{Cvoid}, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueSVMMemFill(command_queue, svm_ptr, pattern, pattern_size, size, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueSVMMemFill(command_queue::cl_command_queue, + svm_ptr::Ptr{Cvoid}, pattern::Ptr{Cvoid}, + pattern_size::Csize_t, size::Csize_t, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueSVMMap(command_queue, blocking_map, flags, svm_ptr, size, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueSVMMap(command_queue::cl_command_queue, blocking_map::cl_bool, + flags::cl_map_flags, svm_ptr::Ptr{Cvoid}, + size::Csize_t, num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueSVMUnmap(command_queue, svm_ptr, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueSVMUnmap(command_queue::cl_command_queue, svm_ptr::Ptr{Cvoid}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueSVMMigrateMem(command_queue, num_svm_pointers, svm_pointers, + sizes, flags, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueSVMMigrateMem(command_queue::cl_command_queue, + num_svm_pointers::cl_uint, + svm_pointers::Ptr{Ptr{Cvoid}}, + sizes::Ptr{Csize_t}, + flags::cl_mem_migration_flags, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +function clGetExtensionFunctionAddressForPlatform(platform, func_name) + @ccall libopencl.clGetExtensionFunctionAddressForPlatform(platform::cl_platform_id, + func_name::Ptr{Cchar})::Ptr{Cvoid} +end + +function clCreateImage2D(context, flags, image_format, image_width, image_height, + image_row_pitch, host_ptr, errcode_ret) + @ccall libopencl.clCreateImage2D(context::cl_context, flags::cl_mem_flags, + image_format::Ptr{cl_image_format}, + image_width::Csize_t, image_height::Csize_t, + image_row_pitch::Csize_t, host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int})::cl_mem +end + +function clCreateImage3D(context, flags, image_format, image_width, image_height, + image_depth, image_row_pitch, image_slice_pitch, host_ptr, + errcode_ret) + @ccall libopencl.clCreateImage3D(context::cl_context, flags::cl_mem_flags, + image_format::Ptr{cl_image_format}, + image_width::Csize_t, image_height::Csize_t, + image_depth::Csize_t, image_row_pitch::Csize_t, + image_slice_pitch::Csize_t, host_ptr::Ptr{Cvoid}, + errcode_ret::Ptr{cl_int})::cl_mem +end + +@checked function clEnqueueMarker(command_queue, event) + @ccall libopencl.clEnqueueMarker(command_queue::cl_command_queue, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueWaitForEvents(command_queue, num_events, event_list) + @ccall libopencl.clEnqueueWaitForEvents(command_queue::cl_command_queue, + num_events::cl_uint, + event_list::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueBarrier(command_queue) + @ccall libopencl.clEnqueueBarrier(command_queue::cl_command_queue)::cl_int +end + +@checked function clUnloadCompiler() + @ccall libopencl.clUnloadCompiler()::cl_int +end + +function clGetExtensionFunctionAddress(func_name) + @ccall libopencl.clGetExtensionFunctionAddress(func_name::Ptr{Cchar})::Ptr{Cvoid} +end + +function clCreateCommandQueue(context, device, properties, errcode_ret) + @ccall libopencl.clCreateCommandQueue(context::cl_context, device::cl_device_id, + properties::cl_command_queue_properties, + errcode_ret::Ptr{cl_int})::cl_command_queue +end + +function clCreateSampler(context, normalized_coords, addressing_mode, filter_mode, + errcode_ret) + @ccall libopencl.clCreateSampler(context::cl_context, normalized_coords::cl_bool, + addressing_mode::cl_addressing_mode, + filter_mode::cl_filter_mode, + errcode_ret::Ptr{cl_int})::cl_sampler +end + +@checked function clEnqueueTask(command_queue, kernel, num_events_in_wait_list, + event_wait_list, event) + @ccall libopencl.clEnqueueTask(command_queue::cl_command_queue, kernel::cl_kernel, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +const cl_gl_context_info = cl_uint + +const cl_gl_object_type = cl_uint + +const cl_gl_texture_info = cl_uint + +const cl_gl_platform_info = cl_uint + +# typedef cl_int CL_API_CALL clGetGLContextInfoKHR_t ( const cl_context_properties * properties , cl_gl_context_info param_name , size_t param_value_size , void * param_value , size_t * param_value_size_ret ) +const clGetGLContextInfoKHR_t = Cvoid + +# typedef clGetGLContextInfoKHR_t * clGetGLContextInfoKHR_fn +const clGetGLContextInfoKHR_fn = Ptr{clGetGLContextInfoKHR_t} + +# typedef cl_mem CL_API_CALL clCreateFromGLBuffer_t ( cl_context context , cl_mem_flags flags , cl_GLuint bufobj , cl_int * errcode_ret ) +const clCreateFromGLBuffer_t = Cvoid + +# typedef clCreateFromGLBuffer_t * clCreateFromGLBuffer_fn +const clCreateFromGLBuffer_fn = Ptr{clCreateFromGLBuffer_t} + +@checked function clGetGLContextInfoKHR(properties, param_name, param_value_size, + param_value, param_value_size_ret) + @ccall libopencl.clGetGLContextInfoKHR(properties::Ptr{cl_context_properties}, + param_name::cl_gl_context_info, + param_value_size::Csize_t, + param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +function clCreateFromGLBuffer(context, flags, bufobj, errcode_ret) + @ccall libopencl.clCreateFromGLBuffer(context::cl_context, flags::cl_mem_flags, + bufobj::cl_GLuint, + errcode_ret::Ptr{cl_int})::cl_mem +end + +# typedef cl_mem CL_API_CALL clCreateFromGLTexture_t ( cl_context context , cl_mem_flags flags , cl_GLenum target , cl_GLint miplevel , cl_GLuint texture , cl_int * errcode_ret ) +const clCreateFromGLTexture_t = Cvoid + +# typedef clCreateFromGLTexture_t * clCreateFromGLTexture_fn +const clCreateFromGLTexture_fn = Ptr{clCreateFromGLTexture_t} + +function clCreateFromGLTexture(context, flags, target, miplevel, texture, errcode_ret) + @ccall libopencl.clCreateFromGLTexture(context::cl_context, flags::cl_mem_flags, + target::cl_GLenum, miplevel::cl_GLint, + texture::cl_GLuint, + errcode_ret::Ptr{cl_int})::cl_mem +end + +# typedef cl_mem CL_API_CALL clCreateFromGLRenderbuffer_t ( cl_context context , cl_mem_flags flags , cl_GLuint renderbuffer , cl_int * errcode_ret ) +const clCreateFromGLRenderbuffer_t = Cvoid + +# typedef clCreateFromGLRenderbuffer_t * clCreateFromGLRenderbuffer_fn +const clCreateFromGLRenderbuffer_fn = Ptr{clCreateFromGLRenderbuffer_t} + +# typedef cl_int CL_API_CALL clGetGLObjectInfo_t ( cl_mem memobj , cl_gl_object_type * gl_object_type , cl_GLuint * gl_object_name ) +const clGetGLObjectInfo_t = Cvoid + +# typedef clGetGLObjectInfo_t * clGetGLObjectInfo_fn +const clGetGLObjectInfo_fn = Ptr{clGetGLObjectInfo_t} + +# typedef cl_int CL_API_CALL clGetGLTextureInfo_t ( cl_mem memobj , cl_gl_texture_info param_name , size_t param_value_size , void * param_value , size_t * param_value_size_ret ) +const clGetGLTextureInfo_t = Cvoid + +# typedef clGetGLTextureInfo_t * clGetGLTextureInfo_fn +const clGetGLTextureInfo_fn = Ptr{clGetGLTextureInfo_t} + +# typedef cl_int CL_API_CALL clEnqueueAcquireGLObjects_t ( cl_command_queue command_queue , cl_uint num_objects , const cl_mem * mem_objects , cl_uint num_events_in_wait_list , const cl_event * event_wait_list , cl_event * event ) +const clEnqueueAcquireGLObjects_t = Cvoid + +# typedef clEnqueueAcquireGLObjects_t * clEnqueueAcquireGLObjects_fn +const clEnqueueAcquireGLObjects_fn = Ptr{clEnqueueAcquireGLObjects_t} + +# typedef cl_int CL_API_CALL clEnqueueReleaseGLObjects_t ( cl_command_queue command_queue , cl_uint num_objects , const cl_mem * mem_objects , cl_uint num_events_in_wait_list , const cl_event * event_wait_list , cl_event * event ) +const clEnqueueReleaseGLObjects_t = Cvoid + +# typedef clEnqueueReleaseGLObjects_t * clEnqueueReleaseGLObjects_fn +const clEnqueueReleaseGLObjects_fn = Ptr{clEnqueueReleaseGLObjects_t} + +function clCreateFromGLRenderbuffer(context, flags, renderbuffer, errcode_ret) + @ccall libopencl.clCreateFromGLRenderbuffer(context::cl_context, flags::cl_mem_flags, + renderbuffer::cl_GLuint, + errcode_ret::Ptr{cl_int})::cl_mem +end + +@checked function clGetGLObjectInfo(memobj, gl_object_type, gl_object_name) + @ccall libopencl.clGetGLObjectInfo(memobj::cl_mem, + gl_object_type::Ptr{cl_gl_object_type}, + gl_object_name::Ptr{cl_GLuint})::cl_int +end + +@checked function clGetGLTextureInfo(memobj, param_name, param_value_size, param_value, + param_value_size_ret) + @ccall libopencl.clGetGLTextureInfo(memobj::cl_mem, param_name::cl_gl_texture_info, + param_value_size::Csize_t, param_value::Ptr{Cvoid}, + param_value_size_ret::Ptr{Csize_t})::cl_int +end + +@checked function clEnqueueAcquireGLObjects(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueAcquireGLObjects(command_queue::cl_command_queue, + num_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +@checked function clEnqueueReleaseGLObjects(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event) + @ccall libopencl.clEnqueueReleaseGLObjects(command_queue::cl_command_queue, + num_objects::cl_uint, + mem_objects::Ptr{cl_mem}, + num_events_in_wait_list::cl_uint, + event_wait_list::Ptr{cl_event}, + event::Ptr{cl_event})::cl_int +end + +# typedef cl_mem CL_API_CALL clCreateFromGLTexture2D_t ( cl_context context , cl_mem_flags flags , cl_GLenum target , cl_GLint miplevel , cl_GLuint texture , cl_int * errcode_ret ) +const clCreateFromGLTexture2D_t = Cvoid + +# typedef clCreateFromGLTexture2D_t * clCreateFromGLTexture2D_fn +const clCreateFromGLTexture2D_fn = Ptr{clCreateFromGLTexture2D_t} + +# typedef cl_mem CL_API_CALL clCreateFromGLTexture3D_t ( cl_context context , cl_mem_flags flags , cl_GLenum target , cl_GLint miplevel , cl_GLuint texture , cl_int * errcode_ret ) +const clCreateFromGLTexture3D_t = Cvoid + +# typedef clCreateFromGLTexture3D_t * clCreateFromGLTexture3D_fn +const clCreateFromGLTexture3D_fn = Ptr{clCreateFromGLTexture3D_t} + +function clCreateFromGLTexture2D(context, flags, target, miplevel, texture, errcode_ret) + @ccall libopencl.clCreateFromGLTexture2D(context::cl_context, flags::cl_mem_flags, + target::cl_GLenum, miplevel::cl_GLint, + texture::cl_GLuint, + errcode_ret::Ptr{cl_int})::cl_mem +end + +function clCreateFromGLTexture3D(context, flags, target, miplevel, texture, errcode_ret) + @ccall libopencl.clCreateFromGLTexture3D(context::cl_context, flags::cl_mem_flags, + target::cl_GLenum, miplevel::cl_GLint, + texture::cl_GLuint, + errcode_ret::Ptr{cl_int})::cl_mem +end + +mutable struct __GLsync end + +const cl_GLsync = Ptr{__GLsync} + +# typedef cl_event CL_API_CALL clCreateEventFromGLsyncKHR_t ( cl_context context , cl_GLsync sync , cl_int * errcode_ret ) +const clCreateEventFromGLsyncKHR_t = Cvoid + +# typedef clCreateEventFromGLsyncKHR_t * clCreateEventFromGLsyncKHR_fn +const clCreateEventFromGLsyncKHR_fn = Ptr{clCreateEventFromGLsyncKHR_t} + +function clCreateEventFromGLsyncKHR(context, sync, errcode_ret) + @ccall libopencl.clCreateEventFromGLsyncKHR(context::cl_context, sync::cl_GLsync, + errcode_ret::Ptr{cl_int})::cl_event +end + +# typedef cl_int CL_API_CALL clGetSupportedGLTextureFormatsINTEL_t ( cl_context context , cl_mem_flags flags , cl_mem_object_type image_type , cl_uint num_entries , cl_GLenum * gl_formats , cl_uint * num_texture_formats ) +const clGetSupportedGLTextureFormatsINTEL_t = Cvoid + +# typedef clGetSupportedGLTextureFormatsINTEL_t * clGetSupportedGLTextureFormatsINTEL_fn +const clGetSupportedGLTextureFormatsINTEL_fn = Ptr{clGetSupportedGLTextureFormatsINTEL_t} + +@checked function clGetSupportedGLTextureFormatsINTEL(context, flags, image_type, + num_entries, gl_formats, + num_texture_formats) + @ccall libopencl.clGetSupportedGLTextureFormatsINTEL(context::cl_context, + flags::cl_mem_flags, + image_type::cl_mem_object_type, + num_entries::cl_uint, + gl_formats::Ptr{cl_GLenum}, + num_texture_formats::Ptr{cl_uint})::cl_int +end + +const CL_NAME_VERSION_MAX_NAME_SIZE = 64 + +const CL_SUCCESS = 0 + +const CL_DEVICE_NOT_FOUND = -1 + +const CL_DEVICE_NOT_AVAILABLE = -2 + +const CL_COMPILER_NOT_AVAILABLE = -3 + +const CL_MEM_OBJECT_ALLOCATION_FAILURE = -4 + +const CL_OUT_OF_RESOURCES = -5 + +const CL_OUT_OF_HOST_MEMORY = -6 + +const CL_PROFILING_INFO_NOT_AVAILABLE = -7 + +const CL_MEM_COPY_OVERLAP = -8 + +const CL_IMAGE_FORMAT_MISMATCH = -9 + +const CL_IMAGE_FORMAT_NOT_SUPPORTED = -10 + +const CL_BUILD_PROGRAM_FAILURE = -11 + +const CL_MAP_FAILURE = -12 + +const CL_MISALIGNED_SUB_BUFFER_OFFSET = -13 + +const CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST = -14 + +const CL_COMPILE_PROGRAM_FAILURE = -15 + +const CL_LINKER_NOT_AVAILABLE = -16 + +const CL_LINK_PROGRAM_FAILURE = -17 + +const CL_DEVICE_PARTITION_FAILED = -18 + +const CL_KERNEL_ARG_INFO_NOT_AVAILABLE = -19 + +const CL_INVALID_VALUE = -30 + +const CL_INVALID_DEVICE_TYPE = -31 + +const CL_INVALID_PLATFORM = -32 + +const CL_INVALID_DEVICE = -33 + +const CL_INVALID_CONTEXT = -34 + +const CL_INVALID_QUEUE_PROPERTIES = -35 + +const CL_INVALID_COMMAND_QUEUE = -36 + +const CL_INVALID_HOST_PTR = -37 + +const CL_INVALID_MEM_OBJECT = -38 + +const CL_INVALID_IMAGE_FORMAT_DESCRIPTOR = -39 + +const CL_INVALID_IMAGE_SIZE = -40 + +const CL_INVALID_SAMPLER = -41 + +const CL_INVALID_BINARY = -42 + +const CL_INVALID_BUILD_OPTIONS = -43 + +const CL_INVALID_PROGRAM = -44 + +const CL_INVALID_PROGRAM_EXECUTABLE = -45 + +const CL_INVALID_KERNEL_NAME = -46 + +const CL_INVALID_KERNEL_DEFINITION = -47 + +const CL_INVALID_KERNEL = -48 + +const CL_INVALID_ARG_INDEX = -49 + +const CL_INVALID_ARG_VALUE = -50 + +const CL_INVALID_ARG_SIZE = -51 + +const CL_INVALID_KERNEL_ARGS = -52 + +const CL_INVALID_WORK_DIMENSION = -53 + +const CL_INVALID_WORK_GROUP_SIZE = -54 + +const CL_INVALID_WORK_ITEM_SIZE = -55 + +const CL_INVALID_GLOBAL_OFFSET = -56 + +const CL_INVALID_EVENT_WAIT_LIST = -57 + +const CL_INVALID_EVENT = -58 + +const CL_INVALID_OPERATION = -59 + +const CL_INVALID_GL_OBJECT = -60 + +const CL_INVALID_BUFFER_SIZE = -61 + +const CL_INVALID_MIP_LEVEL = -62 + +const CL_INVALID_GLOBAL_WORK_SIZE = -63 + +const CL_INVALID_PROPERTY = -64 + +const CL_INVALID_IMAGE_DESCRIPTOR = -65 + +const CL_INVALID_COMPILER_OPTIONS = -66 + +const CL_INVALID_LINKER_OPTIONS = -67 + +const CL_INVALID_DEVICE_PARTITION_COUNT = -68 + +const CL_INVALID_PIPE_SIZE = -69 + +const CL_INVALID_DEVICE_QUEUE = -70 + +const CL_INVALID_SPEC_ID = -71 + +const CL_MAX_SIZE_RESTRICTION_EXCEEDED = -72 + +const CL_FALSE = 0 + +const CL_TRUE = 1 + +const CL_BLOCKING = CL_TRUE + +const CL_NON_BLOCKING = CL_FALSE + +const CL_PLATFORM_PROFILE = 0x0900 + +const CL_PLATFORM_VERSION = 0x0901 + +const CL_PLATFORM_NAME = 0x0902 + +const CL_PLATFORM_VENDOR = 0x0903 + +const CL_PLATFORM_EXTENSIONS = 0x0904 + +const CL_PLATFORM_HOST_TIMER_RESOLUTION = 0x0905 + +const CL_PLATFORM_NUMERIC_VERSION = 0x0906 + +const CL_PLATFORM_EXTENSIONS_WITH_VERSION = 0x0907 + +const CL_DEVICE_TYPE_DEFAULT = 1 << 0 + +const CL_DEVICE_TYPE_CPU = 1 << 1 + +const CL_DEVICE_TYPE_GPU = 1 << 2 + +const CL_DEVICE_TYPE_ACCELERATOR = 1 << 3 + +const CL_DEVICE_TYPE_CUSTOM = 1 << 4 + +const CL_DEVICE_TYPE_ALL = 0xffffffff + +const CL_DEVICE_TYPE = 0x1000 + +const CL_DEVICE_VENDOR_ID = 0x1001 + +const CL_DEVICE_MAX_COMPUTE_UNITS = 0x1002 + +const CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = 0x1003 + +const CL_DEVICE_MAX_WORK_GROUP_SIZE = 0x1004 + +const CL_DEVICE_MAX_WORK_ITEM_SIZES = 0x1005 + +const CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR = 0x1006 + +const CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT = 0x1007 + +const CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT = 0x1008 + +const CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG = 0x1009 + +const CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT = 0x100a + +const CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE = 0x100b + +const CL_DEVICE_MAX_CLOCK_FREQUENCY = 0x100c + +const CL_DEVICE_ADDRESS_BITS = 0x100d + +const CL_DEVICE_MAX_READ_IMAGE_ARGS = 0x100e + +const CL_DEVICE_MAX_WRITE_IMAGE_ARGS = 0x100f + +const CL_DEVICE_MAX_MEM_ALLOC_SIZE = 0x1010 + +const CL_DEVICE_IMAGE2D_MAX_WIDTH = 0x1011 + +const CL_DEVICE_IMAGE2D_MAX_HEIGHT = 0x1012 + +const CL_DEVICE_IMAGE3D_MAX_WIDTH = 0x1013 + +const CL_DEVICE_IMAGE3D_MAX_HEIGHT = 0x1014 + +const CL_DEVICE_IMAGE3D_MAX_DEPTH = 0x1015 + +const CL_DEVICE_IMAGE_SUPPORT = 0x1016 + +const CL_DEVICE_MAX_PARAMETER_SIZE = 0x1017 + +const CL_DEVICE_MAX_SAMPLERS = 0x1018 + +const CL_DEVICE_MEM_BASE_ADDR_ALIGN = 0x1019 + +const CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE = 0x101a + +const CL_DEVICE_SINGLE_FP_CONFIG = 0x101b + +const CL_DEVICE_GLOBAL_MEM_CACHE_TYPE = 0x101c + +const CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE = 0x101d + +const CL_DEVICE_GLOBAL_MEM_CACHE_SIZE = 0x101e + +const CL_DEVICE_GLOBAL_MEM_SIZE = 0x101f + +const CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE = 0x1020 + +const CL_DEVICE_MAX_CONSTANT_ARGS = 0x1021 + +const CL_DEVICE_LOCAL_MEM_TYPE = 0x1022 + +const CL_DEVICE_LOCAL_MEM_SIZE = 0x1023 + +const CL_DEVICE_ERROR_CORRECTION_SUPPORT = 0x1024 + +const CL_DEVICE_PROFILING_TIMER_RESOLUTION = 0x1025 + +const CL_DEVICE_ENDIAN_LITTLE = 0x1026 + +const CL_DEVICE_AVAILABLE = 0x1027 + +const CL_DEVICE_COMPILER_AVAILABLE = 0x1028 + +const CL_DEVICE_EXECUTION_CAPABILITIES = 0x1029 + +const CL_DEVICE_QUEUE_PROPERTIES = 0x102a + +const CL_DEVICE_QUEUE_ON_HOST_PROPERTIES = 0x102a + +const CL_DEVICE_NAME = 0x102b + +const CL_DEVICE_VENDOR = 0x102c + +const CL_DRIVER_VERSION = 0x102d + +const CL_DEVICE_PROFILE = 0x102e + +const CL_DEVICE_VERSION = 0x102f + +const CL_DEVICE_EXTENSIONS = 0x1030 + +const CL_DEVICE_PLATFORM = 0x1031 + +const CL_DEVICE_DOUBLE_FP_CONFIG = 0x1032 + +const CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF = 0x1034 + +const CL_DEVICE_HOST_UNIFIED_MEMORY = 0x1035 + +const CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR = 0x1036 + +const CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT = 0x1037 + +const CL_DEVICE_NATIVE_VECTOR_WIDTH_INT = 0x1038 + +const CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG = 0x1039 + +const CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT = 0x103a + +const CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE = 0x103b + +const CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF = 0x103c + +const CL_DEVICE_OPENCL_C_VERSION = 0x103d + +const CL_DEVICE_LINKER_AVAILABLE = 0x103e + +const CL_DEVICE_BUILT_IN_KERNELS = 0x103f + +const CL_DEVICE_IMAGE_MAX_BUFFER_SIZE = 0x1040 + +const CL_DEVICE_IMAGE_MAX_ARRAY_SIZE = 0x1041 + +const CL_DEVICE_PARENT_DEVICE = 0x1042 + +const CL_DEVICE_PARTITION_MAX_SUB_DEVICES = 0x1043 + +const CL_DEVICE_PARTITION_PROPERTIES = 0x1044 + +const CL_DEVICE_PARTITION_AFFINITY_DOMAIN = 0x1045 + +const CL_DEVICE_PARTITION_TYPE = 0x1046 + +const CL_DEVICE_REFERENCE_COUNT = 0x1047 + +const CL_DEVICE_PREFERRED_INTEROP_USER_SYNC = 0x1048 + +const CL_DEVICE_PRINTF_BUFFER_SIZE = 0x1049 + +const CL_DEVICE_IMAGE_PITCH_ALIGNMENT = 0x104a + +const CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT = 0x104b + +const CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS = 0x104c + +const CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE = 0x104d + +const CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES = 0x104e + +const CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE = 0x104f + +const CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE = 0x1050 + +const CL_DEVICE_MAX_ON_DEVICE_QUEUES = 0x1051 + +const CL_DEVICE_MAX_ON_DEVICE_EVENTS = 0x1052 + +const CL_DEVICE_SVM_CAPABILITIES = 0x1053 + +const CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE = 0x1054 + +const CL_DEVICE_MAX_PIPE_ARGS = 0x1055 + +const CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS = 0x1056 + +const CL_DEVICE_PIPE_MAX_PACKET_SIZE = 0x1057 + +const CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT = 0x1058 + +const CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT = 0x1059 + +const CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT = 0x105a + +const CL_DEVICE_IL_VERSION = 0x105b + +const CL_DEVICE_MAX_NUM_SUB_GROUPS = 0x105c + +const CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 0x105d + +const CL_DEVICE_NUMERIC_VERSION = 0x105e + +const CL_DEVICE_EXTENSIONS_WITH_VERSION = 0x1060 + +const CL_DEVICE_ILS_WITH_VERSION = 0x1061 + +const CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION = 0x1062 + +const CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES = 0x1063 + +const CL_DEVICE_ATOMIC_FENCE_CAPABILITIES = 0x1064 + +const CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT = 0x1065 + +const CL_DEVICE_OPENCL_C_ALL_VERSIONS = 0x1066 + +const CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 0x1067 + +const CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT = 0x1068 + +const CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT = 0x1069 + +const CL_DEVICE_OPENCL_C_FEATURES = 0x106f + +const CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES = 0x1070 + +const CL_DEVICE_PIPE_SUPPORT = 0x1071 + +const CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED = 0x1072 + +const CL_FP_DENORM = 1 << 0 + +const CL_FP_INF_NAN = 1 << 1 + +const CL_FP_ROUND_TO_NEAREST = 1 << 2 + +const CL_FP_ROUND_TO_ZERO = 1 << 3 + +const CL_FP_ROUND_TO_INF = 1 << 4 + +const CL_FP_FMA = 1 << 5 + +const CL_FP_SOFT_FLOAT = 1 << 6 + +const CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT = 1 << 7 + +const CL_NONE = 0x00 + +const CL_READ_ONLY_CACHE = 0x01 + +const CL_READ_WRITE_CACHE = 0x02 + +const CL_LOCAL = 0x01 + +const CL_GLOBAL = 0x02 + +const CL_EXEC_KERNEL = 1 << 0 + +const CL_EXEC_NATIVE_KERNEL = 1 << 1 + +const CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE = 1 << 0 + +const CL_QUEUE_PROFILING_ENABLE = 1 << 1 + +const CL_QUEUE_ON_DEVICE = 1 << 2 + +const CL_QUEUE_ON_DEVICE_DEFAULT = 1 << 3 + +const CL_CONTEXT_REFERENCE_COUNT = 0x1080 + +const CL_CONTEXT_DEVICES = 0x1081 + +const CL_CONTEXT_PROPERTIES = 0x1082 + +const CL_CONTEXT_NUM_DEVICES = 0x1083 + +const CL_CONTEXT_PLATFORM = 0x1084 + +const CL_CONTEXT_INTEROP_USER_SYNC = 0x1085 + +const CL_DEVICE_PARTITION_EQUALLY = 0x1086 + +const CL_DEVICE_PARTITION_BY_COUNTS = 0x1087 + +const CL_DEVICE_PARTITION_BY_COUNTS_LIST_END = 0x00 + +const CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN = 0x1088 + +const CL_DEVICE_AFFINITY_DOMAIN_NUMA = 1 << 0 + +const CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE = 1 << 1 + +const CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE = 1 << 2 + +const CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE = 1 << 3 + +const CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE = 1 << 4 + +const CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE = 1 << 5 + +const CL_DEVICE_SVM_COARSE_GRAIN_BUFFER = 1 << 0 + +const CL_DEVICE_SVM_FINE_GRAIN_BUFFER = 1 << 1 + +const CL_DEVICE_SVM_FINE_GRAIN_SYSTEM = 1 << 2 + +const CL_DEVICE_SVM_ATOMICS = 1 << 3 + +const CL_QUEUE_CONTEXT = 0x1090 + +const CL_QUEUE_DEVICE = 0x1091 + +const CL_QUEUE_REFERENCE_COUNT = 0x1092 + +const CL_QUEUE_PROPERTIES = 0x1093 + +const CL_QUEUE_SIZE = 0x1094 + +const CL_QUEUE_DEVICE_DEFAULT = 0x1095 + +const CL_QUEUE_PROPERTIES_ARRAY = 0x1098 + +const CL_MEM_READ_WRITE = 1 << 0 + +const CL_MEM_WRITE_ONLY = 1 << 1 + +const CL_MEM_READ_ONLY = 1 << 2 + +const CL_MEM_USE_HOST_PTR = 1 << 3 + +const CL_MEM_ALLOC_HOST_PTR = 1 << 4 + +const CL_MEM_COPY_HOST_PTR = 1 << 5 + +const CL_MEM_HOST_WRITE_ONLY = 1 << 7 + +const CL_MEM_HOST_READ_ONLY = 1 << 8 + +const CL_MEM_HOST_NO_ACCESS = 1 << 9 + +const CL_MEM_SVM_FINE_GRAIN_BUFFER = 1 << 10 + +const CL_MEM_SVM_ATOMICS = 1 << 11 + +const CL_MEM_KERNEL_READ_AND_WRITE = 1 << 12 + +const CL_MIGRATE_MEM_OBJECT_HOST = 1 << 0 + +const CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED = 1 << 1 + +const CL_R = 0x10b0 + +const CL_A = 0x10b1 + +const CL_RG = 0x10b2 + +const CL_RA = 0x10b3 + +const CL_RGB = 0x10b4 + +const CL_RGBA = 0x10b5 + +const CL_BGRA = 0x10b6 + +const CL_ARGB = 0x10b7 + +const CL_INTENSITY = 0x10b8 + +const CL_LUMINANCE = 0x10b9 + +const CL_Rx = 0x10ba + +const CL_RGx = 0x10bb + +const CL_RGBx = 0x10bc + +const CL_DEPTH = 0x10bd + +const CL_sRGB = 0x10bf + +const CL_sRGBx = 0x10c0 + +const CL_sRGBA = 0x10c1 + +const CL_sBGRA = 0x10c2 + +const CL_ABGR = 0x10c3 + +const CL_SNORM_INT8 = 0x10d0 + +const CL_SNORM_INT16 = 0x10d1 + +const CL_UNORM_INT8 = 0x10d2 + +const CL_UNORM_INT16 = 0x10d3 + +const CL_UNORM_SHORT_565 = 0x10d4 + +const CL_UNORM_SHORT_555 = 0x10d5 + +const CL_UNORM_INT_101010 = 0x10d6 + +const CL_SIGNED_INT8 = 0x10d7 + +const CL_SIGNED_INT16 = 0x10d8 + +const CL_SIGNED_INT32 = 0x10d9 + +const CL_UNSIGNED_INT8 = 0x10da + +const CL_UNSIGNED_INT16 = 0x10db + +const CL_UNSIGNED_INT32 = 0x10dc + +const CL_HALF_FLOAT = 0x10dd + +const CL_FLOAT = 0x10de + +const CL_UNORM_INT_101010_2 = 0x10e0 + +const CL_MEM_OBJECT_BUFFER = 0x10f0 + +const CL_MEM_OBJECT_IMAGE2D = 0x10f1 + +const CL_MEM_OBJECT_IMAGE3D = 0x10f2 + +const CL_MEM_OBJECT_IMAGE2D_ARRAY = 0x10f3 + +const CL_MEM_OBJECT_IMAGE1D = 0x10f4 + +const CL_MEM_OBJECT_IMAGE1D_ARRAY = 0x10f5 + +const CL_MEM_OBJECT_IMAGE1D_BUFFER = 0x10f6 + +const CL_MEM_OBJECT_PIPE = 0x10f7 + +const CL_MEM_TYPE = 0x1100 + +const CL_MEM_FLAGS = 0x1101 + +const CL_MEM_SIZE = 0x1102 + +const CL_MEM_HOST_PTR = 0x1103 + +const CL_MEM_MAP_COUNT = 0x1104 + +const CL_MEM_REFERENCE_COUNT = 0x1105 + +const CL_MEM_CONTEXT = 0x1106 + +const CL_MEM_ASSOCIATED_MEMOBJECT = 0x1107 + +const CL_MEM_OFFSET = 0x1108 + +const CL_MEM_USES_SVM_POINTER = 0x1109 + +const CL_MEM_PROPERTIES = 0x110a + +const CL_IMAGE_FORMAT = 0x1110 + +const CL_IMAGE_ELEMENT_SIZE = 0x1111 + +const CL_IMAGE_ROW_PITCH = 0x1112 + +const CL_IMAGE_SLICE_PITCH = 0x1113 + +const CL_IMAGE_WIDTH = 0x1114 + +const CL_IMAGE_HEIGHT = 0x1115 + +const CL_IMAGE_DEPTH = 0x1116 + +const CL_IMAGE_ARRAY_SIZE = 0x1117 + +const CL_IMAGE_BUFFER = 0x1118 + +const CL_IMAGE_NUM_MIP_LEVELS = 0x1119 + +const CL_IMAGE_NUM_SAMPLES = 0x111a + +const CL_PIPE_PACKET_SIZE = 0x1120 + +const CL_PIPE_MAX_PACKETS = 0x1121 + +const CL_PIPE_PROPERTIES = 0x1122 + +const CL_ADDRESS_NONE = 0x1130 + +const CL_ADDRESS_CLAMP_TO_EDGE = 0x1131 + +const CL_ADDRESS_CLAMP = 0x1132 + +const CL_ADDRESS_REPEAT = 0x1133 + +const CL_ADDRESS_MIRRORED_REPEAT = 0x1134 + +const CL_FILTER_NEAREST = 0x1140 + +const CL_FILTER_LINEAR = 0x1141 + +const CL_SAMPLER_REFERENCE_COUNT = 0x1150 + +const CL_SAMPLER_CONTEXT = 0x1151 + +const CL_SAMPLER_NORMALIZED_COORDS = 0x1152 + +const CL_SAMPLER_ADDRESSING_MODE = 0x1153 + +const CL_SAMPLER_FILTER_MODE = 0x1154 + +const CL_SAMPLER_MIP_FILTER_MODE = 0x1155 + +const CL_SAMPLER_LOD_MIN = 0x1156 + +const CL_SAMPLER_LOD_MAX = 0x1157 + +const CL_SAMPLER_PROPERTIES = 0x1158 + +const CL_MAP_READ = 1 << 0 + +const CL_MAP_WRITE = 1 << 1 + +const CL_MAP_WRITE_INVALIDATE_REGION = 1 << 2 + +const CL_PROGRAM_REFERENCE_COUNT = 0x1160 + +const CL_PROGRAM_CONTEXT = 0x1161 + +const CL_PROGRAM_NUM_DEVICES = 0x1162 + +const CL_PROGRAM_DEVICES = 0x1163 + +const CL_PROGRAM_SOURCE = 0x1164 + +const CL_PROGRAM_BINARY_SIZES = 0x1165 + +const CL_PROGRAM_BINARIES = 0x1166 + +const CL_PROGRAM_NUM_KERNELS = 0x1167 + +const CL_PROGRAM_KERNEL_NAMES = 0x1168 + +const CL_PROGRAM_IL = 0x1169 + +const CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT = 0x116a + +const CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT = 0x116b + +const CL_PROGRAM_BUILD_STATUS = 0x1181 + +const CL_PROGRAM_BUILD_OPTIONS = 0x1182 + +const CL_PROGRAM_BUILD_LOG = 0x1183 + +const CL_PROGRAM_BINARY_TYPE = 0x1184 + +const CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE = 0x1185 + +const CL_PROGRAM_BINARY_TYPE_NONE = 0x00 + +const CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT = 0x01 + +const CL_PROGRAM_BINARY_TYPE_LIBRARY = 0x02 + +const CL_PROGRAM_BINARY_TYPE_EXECUTABLE = 0x04 + +const CL_BUILD_SUCCESS = 0 + +const CL_BUILD_NONE = -1 + +const CL_BUILD_ERROR = -2 + +const CL_BUILD_IN_PROGRESS = -3 + +const CL_KERNEL_FUNCTION_NAME = 0x1190 + +const CL_KERNEL_NUM_ARGS = 0x1191 + +const CL_KERNEL_REFERENCE_COUNT = 0x1192 + +const CL_KERNEL_CONTEXT = 0x1193 + +const CL_KERNEL_PROGRAM = 0x1194 + +const CL_KERNEL_ATTRIBUTES = 0x1195 + +const CL_KERNEL_ARG_ADDRESS_QUALIFIER = 0x1196 + +const CL_KERNEL_ARG_ACCESS_QUALIFIER = 0x1197 + +const CL_KERNEL_ARG_TYPE_NAME = 0x1198 + +const CL_KERNEL_ARG_TYPE_QUALIFIER = 0x1199 + +const CL_KERNEL_ARG_NAME = 0x119a + +const CL_KERNEL_ARG_ADDRESS_GLOBAL = 0x119b + +const CL_KERNEL_ARG_ADDRESS_LOCAL = 0x119c + +const CL_KERNEL_ARG_ADDRESS_CONSTANT = 0x119d + +const CL_KERNEL_ARG_ADDRESS_PRIVATE = 0x119e + +const CL_KERNEL_ARG_ACCESS_READ_ONLY = 0x11a0 + +const CL_KERNEL_ARG_ACCESS_WRITE_ONLY = 0x11a1 + +const CL_KERNEL_ARG_ACCESS_READ_WRITE = 0x11a2 + +const CL_KERNEL_ARG_ACCESS_NONE = 0x11a3 + +const CL_KERNEL_ARG_TYPE_NONE = 0 + +const CL_KERNEL_ARG_TYPE_CONST = 1 << 0 + +const CL_KERNEL_ARG_TYPE_RESTRICT = 1 << 1 + +const CL_KERNEL_ARG_TYPE_VOLATILE = 1 << 2 + +const CL_KERNEL_ARG_TYPE_PIPE = 1 << 3 + +const CL_KERNEL_WORK_GROUP_SIZE = 0x11b0 + +const CL_KERNEL_COMPILE_WORK_GROUP_SIZE = 0x11b1 + +const CL_KERNEL_LOCAL_MEM_SIZE = 0x11b2 + +const CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 0x11b3 + +const CL_KERNEL_PRIVATE_MEM_SIZE = 0x11b4 + +const CL_KERNEL_GLOBAL_WORK_SIZE = 0x11b5 + +const CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE = 0x2033 + +const CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE = 0x2034 + +const CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT = 0x11b8 + +const CL_KERNEL_MAX_NUM_SUB_GROUPS = 0x11b9 + +const CL_KERNEL_COMPILE_NUM_SUB_GROUPS = 0x11ba + +const CL_KERNEL_EXEC_INFO_SVM_PTRS = 0x11b6 + +const CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM = 0x11b7 + +const CL_EVENT_COMMAND_QUEUE = 0x11d0 + +const CL_EVENT_COMMAND_TYPE = 0x11d1 + +const CL_EVENT_REFERENCE_COUNT = 0x11d2 + +const CL_EVENT_COMMAND_EXECUTION_STATUS = 0x11d3 + +const CL_EVENT_CONTEXT = 0x11d4 + +const CL_COMMAND_NDRANGE_KERNEL = 0x11f0 + +const CL_COMMAND_TASK = 0x11f1 + +const CL_COMMAND_NATIVE_KERNEL = 0x11f2 + +const CL_COMMAND_READ_BUFFER = 0x11f3 + +const CL_COMMAND_WRITE_BUFFER = 0x11f4 + +const CL_COMMAND_COPY_BUFFER = 0x11f5 + +const CL_COMMAND_READ_IMAGE = 0x11f6 + +const CL_COMMAND_WRITE_IMAGE = 0x11f7 + +const CL_COMMAND_COPY_IMAGE = 0x11f8 + +const CL_COMMAND_COPY_IMAGE_TO_BUFFER = 0x11f9 + +const CL_COMMAND_COPY_BUFFER_TO_IMAGE = 0x11fa + +const CL_COMMAND_MAP_BUFFER = 0x11fb + +const CL_COMMAND_MAP_IMAGE = 0x11fc + +const CL_COMMAND_UNMAP_MEM_OBJECT = 0x11fd + +const CL_COMMAND_MARKER = 0x11fe + +const CL_COMMAND_ACQUIRE_GL_OBJECTS = 0x11ff + +const CL_COMMAND_RELEASE_GL_OBJECTS = 0x1200 + +const CL_COMMAND_READ_BUFFER_RECT = 0x1201 + +const CL_COMMAND_WRITE_BUFFER_RECT = 0x1202 + +const CL_COMMAND_COPY_BUFFER_RECT = 0x1203 + +const CL_COMMAND_USER = 0x1204 + +const CL_COMMAND_BARRIER = 0x1205 + +const CL_COMMAND_MIGRATE_MEM_OBJECTS = 0x1206 + +const CL_COMMAND_FILL_BUFFER = 0x1207 + +const CL_COMMAND_FILL_IMAGE = 0x1208 + +const CL_COMMAND_SVM_FREE = 0x1209 + +const CL_COMMAND_SVM_MEMCPY = 0x120a + +const CL_COMMAND_SVM_MEMFILL = 0x120b + +const CL_COMMAND_SVM_MAP = 0x120c + +const CL_COMMAND_SVM_UNMAP = 0x120d + +const CL_COMMAND_SVM_MIGRATE_MEM = 0x120e + +const CL_COMPLETE = 0x00 + +const CL_RUNNING = 0x01 + +const CL_SUBMITTED = 0x02 + +const CL_QUEUED = 0x03 + +const CL_BUFFER_CREATE_TYPE_REGION = 0x1220 + +const CL_PROFILING_COMMAND_QUEUED = 0x1280 + +const CL_PROFILING_COMMAND_SUBMIT = 0x1281 + +const CL_PROFILING_COMMAND_START = 0x1282 + +const CL_PROFILING_COMMAND_END = 0x1283 + +const CL_PROFILING_COMMAND_COMPLETE = 0x1284 + +const CL_DEVICE_ATOMIC_ORDER_RELAXED = 1 << 0 + +const CL_DEVICE_ATOMIC_ORDER_ACQ_REL = 1 << 1 + +const CL_DEVICE_ATOMIC_ORDER_SEQ_CST = 1 << 2 + +const CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM = 1 << 3 + +const CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP = 1 << 4 + +const CL_DEVICE_ATOMIC_SCOPE_DEVICE = 1 << 5 + +const CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES = 1 << 6 + +const CL_DEVICE_QUEUE_SUPPORTED = 1 << 0 + +const CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT = 1 << 1 + +const CL_KHRONOS_VENDOR_ID_CODEPLAY = 0x00010004 + +const CL_VERSION_MAJOR_BITS = 10 + +const CL_VERSION_MINOR_BITS = 10 + +const CL_VERSION_PATCH_BITS = 12 + +const CL_VERSION_MAJOR_MASK = 1 << CL_VERSION_MAJOR_BITS - 1 + +const CL_VERSION_MINOR_MASK = 1 << CL_VERSION_MINOR_BITS - 1 + +const CL_VERSION_PATCH_MASK = 1 << CL_VERSION_PATCH_BITS - 1 + +const cl_khr_gl_sharing = 1 + +const CL_KHR_GL_SHARING_EXTENSION_NAME = "cl_khr_gl_sharing" + +const CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR = -1000 + +const CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR = 0x2006 + +const CL_DEVICES_FOR_GL_CONTEXT_KHR = 0x2007 + +const CL_GL_CONTEXT_KHR = 0x2008 + +const CL_EGL_DISPLAY_KHR = 0x2009 + +const CL_GLX_DISPLAY_KHR = 0x200a + +const CL_WGL_HDC_KHR = 0x200b + +const CL_CGL_SHAREGROUP_KHR = 0x200c + +const CL_GL_OBJECT_BUFFER = 0x2000 + +const CL_GL_OBJECT_TEXTURE2D = 0x2001 + +const CL_GL_OBJECT_TEXTURE3D = 0x2002 + +const CL_GL_OBJECT_RENDERBUFFER = 0x2003 + +const CL_GL_OBJECT_TEXTURE2D_ARRAY = 0x200e + +const CL_GL_OBJECT_TEXTURE1D = 0x200f + +const CL_GL_OBJECT_TEXTURE1D_ARRAY = 0x2010 + +const CL_GL_OBJECT_TEXTURE_BUFFER = 0x2011 + +const CL_GL_TEXTURE_TARGET = 0x2004 + +const CL_GL_MIPMAP_LEVEL = 0x2005 + +const cl_khr_gl_event = 1 + +const CL_KHR_GL_EVENT_EXTENSION_NAME = "cl_khr_gl_event" + +const CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR = 0x200d + +const cl_khr_gl_depth_images = 1 + +const CL_KHR_GL_DEPTH_IMAGES_EXTENSION_NAME = "cl_khr_gl_depth_images" + +const CL_DEPTH_STENCIL = 0x10be + +const CL_UNORM_INT24 = 0x10df + +const cl_khr_gl_msaa_sharing = 1 + +const CL_KHR_GL_MSAA_SHARING_EXTENSION_NAME = "cl_khr_gl_msaa_sharing" + +const CL_GL_NUM_SAMPLES = 0x2012 + +const cl_intel_sharing_format_query_gl = 1 + +const CL_INTEL_SHARING_FORMAT_QUERY_GL_EXTENSION_NAME = "cl_intel_sharing_format_query_gl" diff --git a/res/Project.toml b/res/Project.toml new file mode 100644 index 00000000..6c6d0bb1 --- /dev/null +++ b/res/Project.toml @@ -0,0 +1,4 @@ +[deps] +Clang = "40e3b903-d033-50b4-a0cc-940c62c95e31" +JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" +OpenCL_Headers_jll = "a7aa756b-2b7f-562a-9e9d-e94076c5c8ee" diff --git a/res/opencl.toml b/res/opencl.toml new file mode 100644 index 00000000..a694476c --- /dev/null +++ b/res/opencl.toml @@ -0,0 +1,12 @@ +[general] +library_name = "libopencl" +output_file_path = "../lib/libopencl.jl" +prologue_file_path = "./opencl_prologue.jl" +print_using_CEnum = false + + +[codegen] +use_ccall_macro = true + +[api] +checked_rettypes = [ "cl_int" ] diff --git a/res/opencl_prologue.jl b/res/opencl_prologue.jl new file mode 100644 index 00000000..33f8536e --- /dev/null +++ b/res/opencl_prologue.jl @@ -0,0 +1,18 @@ +# outlined functionality to avoid GC frame allocation +@noinline function throw_api_error(res) + throw(CLError(res)) +end + +function check(f) + res = retry_reclaim(err -> err == CL_OUT_OF_RESOURCES || + err == CL_MEM_OBJECT_ALLOCATION_FAILURE || + err == CL_OUT_OF_HOST_MEMORY) do + f() + end + + if res != CL_SUCCESS + throw_api_error(res) + end + + return +end diff --git a/res/wrap.jl b/res/wrap.jl new file mode 100644 index 00000000..1bb42f83 --- /dev/null +++ b/res/wrap.jl @@ -0,0 +1,110 @@ +# script to parse OpenCL headers and generate Julia wrappers + + +# +# Parsing +# + +using Clang +using Clang.Generators + +using JuliaFormatter + +function wrap(name, headers...; defines=[], include_dirs=[], dependents=true) + @info "Wrapping $name" + + args = get_default_args() + for define in defines + if isa(define, Pair) + append!(args, ["-D", "$(first(define))=$(last(define))"]) + else + append!(args, ["-D", "$define"]) + end + end + for include_dir in include_dirs + push!(args, "-isystem$include_dir") + end + + options = load_options(joinpath(@__DIR__, "$(name).toml")) + + # create context + ctx = create_context([headers...], args, options) + + # run generator + build!(ctx, BUILDSTAGE_NO_PRINTING) + + # if requested, only wrap stuff from the list of headers + # (i.e., not from included ones) + if !dependents + function rewrite!(dag::ExprDAG) + replace!(get_nodes(dag)) do node + path = normpath(Clang.get_filename(node.cursor)) + if !in(path, headers) + return ExprNode(node.id, Generators.Skip(), node.cursor, Expr[], node.adj) + end + return node + end + end + rewrite!(ctx.dag) + end + + rewriter!(ctx, options) + + build!(ctx, BUILDSTAGE_PRINTING_ONLY) + + format_file(options["general"]["output_file_path"], YASStyle()) + + return +end + +function rewriter!(ctx, options) + for node in get_nodes(ctx.dag) + if Generators.is_function(node) && !Generators.is_variadic_function(node) + expr = node.exprs[1] + call_expr = expr.args[2].args[1].args[3] # assumes `@ccall` + + target_expr = call_expr.args[1].args[1] + fn = String(target_expr.args[2].value) + + # rewrite pointer argument types + arg_exprs = call_expr.args[1].args[2:end] + if haskey(options, "api") && haskey(options["api"], fn) + argtypes = get(options["api"][fn], "argtypes", Dict()) + for (arg, typ) in argtypes + i = parse(Int, arg) + arg_exprs[i].args[2] = Meta.parse(typ) + end + end + + # insert `@checked` before each function with a `ccall` returning a checked type` + rettyp = call_expr.args[2] + checked_types = if haskey(options, "api") + get(options["api"], "checked_rettypes", String[]) + else + String[] + end + if rettyp isa Symbol && String(rettyp) in checked_types + node.exprs[1] = Expr(:macrocall, Symbol("@checked"), nothing, expr) + end + end + end +end + + +# +# Main application +# + +using OpenCL_Headers_jll + +function main() + headers = ["cl.h", "cl_gl.h"] + include_dir = joinpath(OpenCL_Headers_jll.artifact_dir, "include") + paths = map(headers) do header + joinpath(include_dir, "CL", header) + end + wrap("opencl", paths...; include_dirs=[include_dir], + defines=["CL_TARGET_OPENCL_VERSION" => "300"],) +end + +isinteractive() || main() diff --git a/src/OpenCL.jl b/src/OpenCL.jl index 3925b029..a6c1e838 100644 --- a/src/OpenCL.jl +++ b/src/OpenCL.jl @@ -6,21 +6,15 @@ module cl abstract type CLObject end Base.hash(x::CLObject) = hash(pointer(x)) -Base.isequal(x :: T, y :: T) where {T <: CLObject} = Base.hash(x) == Base.hash(y) -Base.:(==)(x :: T, y :: T) where {T <: CLObject} = Base.hash(x) == Base.hash(y) - -# OpenCL Types -include("types.jl") +Base.isequal(x::T, y::T) where {T <: CLObject} = Base.hash(x) == Base.hash(y) +Base.:(==)(x::T, y::T) where {T <: CLObject} = Base.hash(x) == Base.hash(y) # The arrays contain a nullbyte that we pop first -function CLString(v :: Array{CL_char}) +function CLString(v::Array{Cchar}) pop!(v) String(reinterpret(UInt8, v)) end -# OpenCL Constants -include("constants.jl") - # OpenCL low level api include("api.jl") @@ -67,7 +61,7 @@ include("array.jl") end # cl function __init__() - if cl.api.libopencl == "" + if cl.libopencl == "" @warn "Could not locate an OpenCL library\nOpenCL API calls will be unavailable" end end diff --git a/src/api.jl b/src/api.jl index 623e5916..f045422c 100644 --- a/src/api.jl +++ b/src/api.jl @@ -1,43 +1,94 @@ -module api - -include("types.jl") - import OpenCL_jll const libopencl = OpenCL_jll.libopencl -function _ocl_func(func, ret_type, arg_types) - local args_in = Symbol[Symbol("arg$i") - for (i, T) in enumerate(arg_types.args)] +""" + @checked function foo(...) + rv = ... + return rv + end + +Macro for wrapping a function definition returning a status code. Two versions of the +function will be generated: `foo`, with the function body wrapped by an invocation of the +`check` function (to be implemented by the caller of this macro), and `unchecked_foo` where no +such invocation is present and the status code is returned to the caller. +""" +macro checked(ex) + # parse the function definition + @assert Meta.isexpr(ex, :function) + sig = ex.args[1] + @assert Meta.isexpr(sig, :call) + body = ex.args[2] + @assert Meta.isexpr(body, :block) - esc(quote - function $func($(args_in...)) - ccall(($(string(func)), libopencl), - $ret_type, - $arg_types, - $(args_in...)) + # we need to detect the first API call, so add an initialization check + body = quote + if !initialized[] + initialize() end - end) -end + $body + end -macro ocl_func(func, ret_type, arg_types) - _ocl_func(func, ret_type, arg_types) + # generate a "safe" version that performs a check + safe_body = quote + check() do + $body + end + end + safe_sig = Expr(:call, sig.args[1], sig.args[2:end]...) + safe_def = Expr(:function, safe_sig, safe_body) + + # generate a "unchecked" version that returns the error code instead + unchecked_sig = Expr(:call, Symbol("unchecked_", sig.args[1]), sig.args[2:end]...) + unchecked_def = Expr(:function, unchecked_sig, body) + + return esc(:($safe_def, $unchecked_def)) end -const CL_callback = Ptr{Nothing} +function retry_reclaim(f, isfailed) + ret = f() -abstract type CL_user_data_tag end -const CL_user_data = Ptr{CL_user_data_tag} + # slow path, incrementally reclaiming more memory until we succeed + if isfailed(ret) + phase = 1 + while true + if phase == 1 + GC.gc(false) + elseif phase == 2 + GC.gc(true) + else + break + end + phase += 1 -Base.cconvert(::Type{Ptr{CL_user_data_tag}}, obj::T) where {T} = Ref{T}(obj) + ret = f() + isfailed(ret) || break + end + end + + ret +end + +include("../lib/libopencl.jl") + +# lazy initialization +const initialized = Ref{Bool}(false) +@noinline function initialize() + initialized[] = true -Base.unsafe_convert(P::Type{Ptr{CL_user_data_tag}}, ptr::Ref) = P(Base.unsafe_convert(Ptr{Cvoid}, ptr)) -Base.unsafe_convert(P::Type{Ptr{CL_user_data_tag}}, ptr::Ptr) = P(Base.unsafe_convert(Ptr{Cvoid}, ptr)) + if isempty(OpenCL_jll.drivers) + @warn """No OpenCL driver JLLs were detected at the time of the first call into OpenCL.jl. + Only system drivers will be available.""" + return + end -include("api/opencl_1.0.0.jl") -include("api/opencl_1.1.0.jl") -include("api/opencl_1.2.0.jl") -include("api/opencl_2.0.0.jl") + withenv("OCL_ICD_FILENAMES"=>join(OpenCL_jll.drivers, ':')) do + num_platforms = Ref{Cuint}() + @ccall libopencl.clGetPlatformIDs( + 0::cl_uint, C_NULL::Ptr{cl_platform_id}, + num_platforms::Ptr{cl_uint})::cl_int + end +end function parse_version(version_string) mg = match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string) @@ -47,5 +98,3 @@ function parse_version(version_string) return VersionNumber(parse(Int, mg.captures[1]), parse(Int, mg.captures[2])) end - -end diff --git a/src/api/opencl_1.0.0.jl b/src/api/opencl_1.0.0.jl deleted file mode 100644 index 2169bb1f..00000000 --- a/src/api/opencl_1.0.0.jl +++ /dev/null @@ -1,210 +0,0 @@ -#=== platform apis ===# -@ocl_func(clGetPlatformIDs, CL_int, - (CL_uint, Ptr{CL_platform_id}, Ptr{CL_uint})) - -@ocl_func(clGetPlatformInfo, - CL_int, (CL_platform_id, CL_platform_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== device apis ===# -@ocl_func(clGetDeviceIDs, CL_int, - (CL_platform_id, CL_device_type, CL_uint, Ptr{CL_device_id}, Ptr{CL_uint})) - -@ocl_func(clGetDeviceInfo, CL_int, - (CL_device_id, CL_device_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== context apis ===# -#TODO: pass user data as Any type -@ocl_func(clCreateContext, CL_context, - (Ptr{CL_context_properties}, CL_uint, Ptr{CL_device_id}, CL_callback, CL_callback, Ptr{CL_int})) - -@ocl_func(clCreateContextFromType, CL_context, - (Ptr{CL_context_properties}, CL_device_type, CL_callback, CL_callback, Ptr{CL_int})) - -@ocl_func(clRetainContext, CL_int, (CL_context,)) - -@ocl_func(clReleaseContext, CL_int, (CL_context,)) - -@ocl_func(clGetContextInfo, CL_int, - (CL_context, CL_context_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== command queue apis ===# -@ocl_func(clCreateCommandQueue, CL_command_queue, - (CL_context, CL_device_id, CL_command_queue_properties, Ptr{CL_int})) - -@ocl_func(clRetainCommandQueue, CL_int, (CL_command_queue,)) - -@ocl_func(clReleaseCommandQueue, CL_int, (CL_command_queue,)) - -@ocl_func(clGetCommandQueueInfo, CL_int, - (CL_command_queue, CL_command_queue_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== memory object apis ===# -@ocl_func(clCreateBuffer, CL_mem, - (CL_context, CL_mem_flags, Csize_t, Ptr{Nothing}, Ptr{CL_int})) - -@ocl_func(clRetainMemObject, CL_int, (CL_mem,)) - -@ocl_func(clReleaseMemObject, CL_int, (CL_mem,)) - -@ocl_func(clGetSupportedImageFormats, CL_int, - (CL_context, CL_mem_flags, CL_mem_object_type, CL_uint, Ptr{CL_image_format}, Ptr{CL_uint})) - -@ocl_func(clGetMemObjectInfo, CL_mem, - (CL_mem, CL_mem_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -@ocl_func(clGetImageInfo, CL_mem, - (CL_mem, CL_image_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== sampler apis ===# -@ocl_func(clCreateSampler, CL_sampler, - (CL_context, CL_bool, CL_addressing_mode, CL_filter_mode, Ptr{CL_int})) - -@ocl_func(clRetainSampler, CL_int, (CL_sampler,)) - -@ocl_func(clReleaseSampler, CL_int, (CL_sampler,)) - -@ocl_func(clGetSamplerInfo, CL_int, - (CL_sampler, CL_sampler_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== program object apis ===# -@ocl_func(clCreateProgramWithSource, CL_program, - (CL_context, CL_uint, Ptr{Ptr{Cchar}}, Ptr{Csize_t}, Ptr{CL_int})) - -@ocl_func(clCreateProgramWithBinary, CL_program, - (CL_context, CL_uint, Ptr{CL_device_id}, Ptr{Csize_t}, - Ptr{Ptr{Cuchar}}, Ptr{CL_int}, Ptr{CL_int})) - -@ocl_func(clRetainProgram, CL_int, (CL_program,)) - -@ocl_func(clReleaseProgram, CL_int, (CL_program,)) - -@ocl_func(clBuildProgram, CL_int, - (CL_program, CL_uint, Ptr{CL_device_id}, Ptr{Cchar}, CL_callback, Ptr{Nothing})) - -@ocl_func(clGetProgramBuildInfo, CL_int, - (CL_program, CL_device_id, CL_program_build_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== kernel object apis ===# -@ocl_func(clCreateKernel, CL_kernel, - (CL_program, Ptr{Cchar}, Ptr{CL_int})) - -@ocl_func(clCreateKernelsInProgram, CL_int, - (CL_program, CL_uint, Ptr{CL_kernel}, Ptr{CL_uint})) - -@ocl_func(clRetainKernel, CL_int, (CL_kernel,)) - -@ocl_func(clReleaseKernel, CL_int, (CL_kernel,)) - -@ocl_func(clSetKernelArg, CL_int, - (CL_kernel, CL_uint, Csize_t, Ptr{Nothing})) - -@ocl_func(clGetKernelInfo, CL_int, - (CL_kernel, CL_kernel_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -@ocl_func(clGetKernelWorkGroupInfo, CL_int, - (CL_kernel, CL_device_id, CL_kernel_work_group_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== event object apis ===# -@ocl_func(clWaitForEvents, CL_int, - (CL_uint, Ptr{CL_event_info})) - -@ocl_func(clGetEventInfo, CL_int, - (CL_event, CL_event_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -@ocl_func(clRetainEvent, CL_int, (CL_event,)) - -@ocl_func(clReleaseEvent, CL_int, (CL_event,)) - -#=== profiling apis ===# -@ocl_func(clGetEventProfilingInfo, CL_int, - (CL_event, CL_profiling_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== flush and finish apis ===# -@ocl_func(clFlush, CL_int, (CL_command_queue,)) - -@ocl_func(clFinish, CL_int, (CL_command_queue,)) - -#=== enqueued commands apis ===# -@ocl_func(clEnqueueReadBuffer, CL_int, - (CL_command_queue, CL_mem, CL_bool, Csize_t, Csize_t, Ptr{Nothing}, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueWriteBuffer, CL_int, - (CL_command_queue, CL_mem, CL_bool, - Csize_t, Csize_t, Ptr{Nothing}, CL_uint, - Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueCopyBuffer, CL_int, - (CL_command_queue, CL_mem, CL_mem, - Csize_t, Csize_t, Csize_t, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueReadImage, CL_int, - (CL_command_queue, CL_mem, CL_bool, - Ptr{Csize_t}, Ptr{Csize_t}, Csize_t, Csize_t, - Ptr{Nothing}, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueWriteImage, CL_int, - (CL_command_queue, CL_mem, CL_bool, Ptr{Csize_t}, Ptr{Csize_t}, - Csize_t, Csize_t, Ptr{Nothing}, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueCopyImage, CL_int, - (CL_command_queue, CL_mem, CL_mem, Ptr{Csize_t}, Ptr{Csize_t}, Ptr{Csize_t}, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueCopyImageToBuffer, CL_int, - (CL_command_queue, CL_mem, CL_mem, Ptr{Csize_t}, Ptr{Csize_t}, - Csize_t, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueCopyBufferToImage, CL_int, - (CL_command_queue, CL_mem, CL_mem, Csize_t, Ptr{Csize_t}, Ptr{Csize_t}, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueMapBuffer, Ptr{Nothing}, - (CL_command_queue, CL_mem, CL_bool, CL_map_flags, Csize_t, Csize_t, - CL_uint, Ptr{CL_event}, Ptr{CL_event}, Ptr{CL_int})) - -@ocl_func(clEnqueueMapImage, Ptr{Nothing}, - (CL_command_queue, CL_mem, CL_bool, CL_map_flags, - Ptr{Csize_t}, Ptr{Csize_t}, Ptr{Csize_t}, Ptr{Csize_t}, - CL_uint, Ptr{CL_event}, Ptr{CL_event}, Ptr{CL_int})) - -@ocl_func(clEnqueueUnmapMemObject, CL_int, - (CL_command_queue, CL_mem, Ptr{Nothing}, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueNDRangeKernel, CL_int, - (CL_command_queue, CL_kernel, CL_uint, - Ptr{Csize_t}, Ptr{Csize_t}, Ptr{Csize_t}, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueTask, CL_int, - (CL_command_queue, CL_kernel, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueNativeKernel, CL_int, - (CL_command_queue, Ptr{Nothing}, Csize_t, CL_uint, - Ptr{CL_mem}, Ptr{Ptr{Nothing}}, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -#== opengl interop functions ==# - -@ocl_func(clEnqueueAcquireGLObjects, CL_int, - (CL_command_queue, CL_uint, Ptr{CL_mem}, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueReleaseGLObjects, CL_int, - (CL_command_queue, CL_uint, Ptr{CL_mem}, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clCreateFromGLBuffer, CL_mem, - (CL_context, CL_mem_flags, GL_uint, Ptr{CL_int})) - -@ocl_func(clCreateFromGLRenderbuffer, CL_mem, - (CL_context, CL_mem_flags, GL_uint, Ptr{CL_int})) - -@ocl_func(clCreateFromGLTexture2D, CL_mem, - (CL_context, CL_mem_flags, GL_enum, GL_int, GL_uint, Ptr{CL_int})) - -@ocl_func(clCreateFromGLTexture3D, CL_mem, - (CL_context, CL_mem_flags, GL_enum, GL_int, GL_uint, Ptr{CL_int})) - -@ocl_func(clGetGLObjectInfo, CL_int, - (CL_mem, Ptr{CL_GL_object_type}, Ptr{GL_uint})) - -@ocl_func(clGetGLTextureInfo, CL_int, - (CL_mem, CL_GL_texture_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) diff --git a/src/api/opencl_1.1.0.jl b/src/api/opencl_1.1.0.jl deleted file mode 100644 index 7d0428f7..00000000 --- a/src/api/opencl_1.1.0.jl +++ /dev/null @@ -1,69 +0,0 @@ -#=== compiler apis ===# -@ocl_func(clUnloadCompiler, CL_int, ()) - -#=== memory object apis ===# -@ocl_func(clCreateSubBuffer, CL_mem, - (CL_mem, CL_mem_flags, CL_buffer_create_type, Ptr{Nothing}, Ptr{CL_int})) - -@ocl_func(clSetMemObjectDestructorCallback, CL_int, - (CL_mem, CL_callback, Ptr{Nothing})) - -@ocl_func(clCreateImage2D, CL_mem, - (CL_context, CL_mem_flags, Ptr{CL_image_format}, Csize_t, Csize_t, Csize_t, - Ptr{Nothing}, Ptr{CL_int})) - -@ocl_func(clCreateImage3D, CL_mem, - (CL_context, CL_mem_flags, Ptr{CL_image_format}, Csize_t, Csize_t, Csize_t, - Csize_t, Ptr{Nothing}, Ptr{CL_int})) - -#=== program object apis ===# -@ocl_func(clGetProgramInfo, CL_int, - (CL_program, CL_program_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== event object apis ===# -@ocl_func(clCreateUserEvent, CL_event, - (CL_context, Ptr{CL_int})) - -@ocl_func(clSetUserEventStatus, CL_int, (CL_event, CL_int)) - -@ocl_func(clSetEventCallback, CL_int, - (CL_event, CL_int, CL_callback, CL_user_data)) - -#=== enqueued commands apis ===# -@ocl_func(clEnqueueReadBufferRect, CL_int, - (CL_command_queue, CL_mem, CL_bool, - Ptr{Csize_t}, Ptr{Csize_t}, Ptr{Csize_t}, - Csize_t, Csize_t, Csize_t, Csize_t, - Ptr{Nothing}, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueWriteBufferRect, CL_int, - (CL_command_queue, CL_mem, CL_bool, - Ptr{Csize_t}, Ptr{Csize_t}, Ptr{Csize_t}, - Csize_t, Csize_t, Csize_t, Csize_t, - Ptr{Nothing}, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueCopyBufferRect, CL_int, - (CL_command_queue, CL_mem, CL_mem, - Ptr{Csize_t}, Ptr{Csize_t}, Ptr{Csize_t}, - Csize_t, Csize_t, Csize_t, Csize_t, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueMarker, CL_int, - (CL_command_queue, Ptr{CL_event})) - -@ocl_func(clEnqueueWaitForEvents, CL_int, - (CL_command_queue, CL_uint, Ptr{CL_event})) - -@ocl_func(clEnqueueBarrier, CL_int, - (CL_command_queue,)) - -#=== extension function access ===# -@ocl_func(clGetExtensionFunctionAddress, Ptr{Nothing}, (Ptr{Cchar},)) - -#=== opengl interop functions ===# - -@ocl_func(clGetGLContextInfoKHR, CL_int, - (Ptr{CL_context_properties}, CL_gl_context_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -@ocl_func(clCreateEventFromGLsyncKHR, CL_event, - (CL_context, GL_sync, Ptr{CL_int})) diff --git a/src/api/opencl_1.2.0.jl b/src/api/opencl_1.2.0.jl deleted file mode 100644 index 21c31f07..00000000 --- a/src/api/opencl_1.2.0.jl +++ /dev/null @@ -1,72 +0,0 @@ -#=== device apis ===# -@ocl_func(clCreateSubDevices, CL_int, - (CL_device_id, CL_device_partition_property, CL_uint, Ptr{CL_device_id}, Ptr{CL_uint})) - -@ocl_func(clRetainDevice, CL_int, (CL_device_id,)) - -@ocl_func(clReleaseDevice, CL_int, (CL_device_id,)) - -#=== memory object apis ===# -@ocl_func(clCreateImage, CL_mem, - (CL_context, CL_mem_flags, CL_image_format, CL_image_desc, Ptr{Nothing}, Ptr{CL_int})) - -#=== program object apis ===# -@ocl_func(clCreateProgramWithBuiltInKernels, CL_program, - (CL_context, CL_uint, Ptr{CL_device_id}, Ptr{Cchar}, Ptr{CL_int})) - -@ocl_func(clCompileProgram, CL_int, - (CL_program, CL_uint, Ptr{CL_device_id}, Ptr{CL_device_id}, Ptr{Cchar}, - CL_uint, Ptr{CL_program}, Ptr{Ptr{Char}}, CL_callback, Ptr{Nothing})) - -@ocl_func(clLinkProgram, CL_program, - (CL_context, CL_uint, Ptr{CL_device_id}, Ptr{Cchar}, CL_uint, - CL_callback, Ptr{Nothing}, Ptr{CL_int})) - -@ocl_func(clUnloadPlatformCompiler, CL_int, (CL_platform_id,)) - -#=== kernel object apis ===# -@ocl_func(clGetKernelArgInfo, CL_int, - (CL_kernel, CL_uint, CL_kernel_arg_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== enqueued commands apis ===# -@ocl_func(clEnqueueFillBuffer, CL_int, - (CL_command_queue, CL_mem, Ptr{Nothing}, Csize_t, Csize_t, Csize_t, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueFillImage, CL_int, - (CL_command_queue, CL_mem, Ptr{Nothing}, Ptr{Csize_t}, Ptr{Csize_t}, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueMigrateMemObjects, CL_int, - (CL_command_queue, CL_uint, Ptr{CL_mem}, CL_mem_migration_flags, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueMarkerWithWaitList, CL_int, - (CL_command_queue, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueBarrierWithWaitList, CL_int, - (CL_command_queue, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -#=== extension function access ===# -@ocl_func(clGetExtensionFunctionAddressForPlatform, Ptr{Nothing}, - (CL_platform_id, Ptr{Cchar})) - -#=== opengl interop functions ===# - -@ocl_func(clCreateFromGLTexture, CL_mem, - (CL_context, CL_mem_flags, GL_enum, GL_int, GL_uint, Ptr{CL_int})) - -#=== deprecation ===# - -# @deprecate clGetExtensionFunctionAddress clGetExtensionFunctionAddressForPlatform - -# @deprecate clCreateImage2D clCreateImage -# @deprecate clCreateImage3D clCreateImage - -# @deprecate clEnqueueMarker clEnqueueMarkerWithWaitList -# @deprecate clEnqueueBarrier clEnqueueMarkerWithWaitList -# @deprecate clEnqueueWaitForEvents clEnqueueMarkerWithWaitList -# @deprecate clUnloadCompiler Nothing() - -# @deprecate clCreateFromGLTexture2D clCreateFromGLTexture -# @deprecate clCreateFromGLTexture3D clCreateFromGLTexture diff --git a/src/api/opencl_2.0.0.jl b/src/api/opencl_2.0.0.jl deleted file mode 100644 index 30b173b7..00000000 --- a/src/api/opencl_2.0.0.jl +++ /dev/null @@ -1,55 +0,0 @@ -#=== memory command queue apis ===# -@ocl_func(clCreateCommandQueueWithProperties, CL_command_queue, - (CL_context, CL_device_id, CL_queue_properties, Ptr{CL_int})) - -#=== memory object apis ===# -@ocl_func(clCreatePipe, CL_mem, - (CL_context, CL_mem_flags, CL_uint, CL_uint, Ptr{CL_pipe_properties}, CL_int)) - -@ocl_func(clGetPipeInfo, CL_int, - (CL_mem, CL_pipe_info, Csize_t, Ptr{Nothing}, Ptr{Csize_t})) - -#=== SVM Allocation API ===# -@ocl_func(clSVMAlloc, Ptr{Nothing}, - (CL_context, CL_svm_mem_flags, Csize_t, CL_uint)) - -@ocl_func(clSVMFree, Nothing, - (CL_context, Ptr{Nothing})) - -#=== sampler apis ===# - -@ocl_func(clCreateSamplerWithProperties, CL_sampler, - (CL_context, Ptr{CL_sampler_properties}, Ptr{CL_int})) - -#=== kernel object apis ===# -@ocl_func(clSetKernelArgSVMPointer, CL_int, - (CL_kernel, CL_uint, Ptr{Nothing})) - -@ocl_func(clSetKernelExecInfo, CL_int, - (CL_kernel, CL_kernel_exec_info, Csize_t, Ptr{Nothing})) - -#=== Enqueued Commands APIs ===# -@ocl_func(clEnqueueSVMFree, CL_int, - (CL_command_queue, CL_uint, Ptr{Ptr{Nothing}}, Ptr{Nothing}, Ptr{Nothing}, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueSVMMemcpy, CL_int, - (CL_command_queue, CL_bool, Ptr{Nothing}, Ptr{Nothing}, Csize_t, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueSVMMemFill, CL_int, - (CL_command_queue, Ptr{Nothing}, Ptr{Nothing}, Csize_t, Csize_t, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueSVMMap, CL_int, - (CL_command_queue, CL_bool, CL_map_flags, Ptr{Nothing}, Csize_t, - CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -@ocl_func(clEnqueueSVMUnmap, CL_int, - (CL_command_queue, Ptr{Nothing}, CL_uint, Ptr{CL_event}, Ptr{CL_event})) - -#=== deprecation ===# - -# @deprecate clCreateCommandQueue clCreateCommandQueueWithProperties -# @deprecate clCreateSampler clCreateSamplerWithProperties -# @deprecate clEnqueueTask diff --git a/src/array.jl b/src/array.jl index 93257ba2..c2e1d102 100644 --- a/src/array.jl +++ b/src/array.jl @@ -21,7 +21,7 @@ function CLArray(queue::CmdQueue, flags::Tuple{Vararg{Symbol}}, hostarray::AbstractArray{T,N}) where {T, N} ctx = context(queue) - buf = Buffer(T, ctx, flags, hostbuf=hostarray) + buf = Buffer(T, ctx, length(hostarray), flags, hostbuf=hostarray) sz = size(hostarray) CLArray(ctx, queue, buf, sz) end @@ -48,7 +48,7 @@ function Base.fill(::Type{T}, q::CmdQueue, x::T, dims...) where T buf = Buffer(T, ctx, prod(dims)) fill!(q, buf, x) else - buf = Buffer(T, ctx, (:rw, :copy), prod(dims), hostbuf=fill(x, dims)) + buf = Buffer(T, ctx, prod(dims), (:rw, :copy), hostbuf=fill(x, dims)) end return CLArray(buf, q, dims) end diff --git a/src/buffer.jl b/src/buffer.jl index e6ab8619..810aa98d 100644 --- a/src/buffer.jl +++ b/src/buffer.jl @@ -2,16 +2,16 @@ mutable struct Buffer{T} <: CLMemObject valid::Bool - id::CL_mem + id::cl_mem len::Int mapped::Bool hostbuf::Ptr{T} - function Buffer{T}(mem_id::CL_mem, retain::Bool, len::Integer) where T #hostbuf + function Buffer{T}(mem_id::cl_mem, retain::Bool, len::Integer) where T #hostbuf @assert len > 0 @assert mem_id != C_NULL if retain - @check api.clRetainMemObject(mem_id) + clRetainMemObject(mem_id) end nbytes = sizeof(T) * len buff = new{T}(true, mem_id, len, false, C_NULL) @@ -39,16 +39,19 @@ Base.show(io::IO, b::Buffer{T}) where {T} = begin print(io, "Buffer{$T}(@$ptr_address)") end -# high level Buffer constructors with symbol flags -function Buffer(::Type{T}, ctx::Context, len::Integer=0; hostbuf=nothing) where T - Buffer(T, ctx, (:rw, :null), len, hostbuf=hostbuf) +# XXX: conflict between integer flags and length. +# design is messy. probably best move all flags into a kwarg? + +# high level Buffer constructors with symbol flags +function Buffer(::Type{T}, ctx::Context, len::Integer; hostbuf=nothing) where T + Buffer(T, ctx, len, (:rw, :null), hostbuf=hostbuf) end -function Buffer(::Type{T}, ctx::Context, mem_flag::Symbol, len::Integer=0; hostbuf=nothing) where T - Buffer(T, ctx, (mem_flag, :null), len, hostbuf=hostbuf) +function Buffer(::Type{T}, ctx::Context, len::Integer, mem_flag::Symbol; hostbuf=nothing) where T + Buffer(T, ctx, len, (mem_flag, :null), hostbuf=hostbuf) end -function Buffer(::Type{T}, ctx::Context, mem_flags::NTuple{2, Symbol}, len::Integer=0; hostbuf=nothing) where T +function Buffer(::Type{T}, ctx::Context, len::Integer, mem_flags::NTuple{2, Symbol}; hostbuf=nothing) where T f_r = :r in mem_flags f_w = :w in mem_flags f_rw = :rw in mem_flags @@ -57,7 +60,7 @@ function Buffer(::Type{T}, ctx::Context, mem_flags::NTuple{2, Symbol}, len::Inte throw(ArgumentError("only one flag in {:r, :w, :rw} can be defined")) end - local flags::CL_mem_flags + local flags::cl_mem_flags if f_rw && !(f_r || f_w) flags = CL_MEM_READ_WRITE elseif f_r && !(f_w || f_rw) @@ -83,12 +86,12 @@ function Buffer(::Type{T}, ctx::Context, mem_flags::NTuple{2, Symbol}, len::Inte elseif f_copy && !(f_alloc || f_use) flags |= CL_MEM_COPY_HOST_PTR end - return Buffer(T, ctx, flags, len, hostbuf=hostbuf) + return Buffer(T, ctx, len, flags, hostbuf=hostbuf) end # low level Buffer constructor with integer parameter flags -function Buffer(::Type{T}, ctx::Context, flags::CL_mem_flags, - len::Integer=0; hostbuf::Union{Nothing,Array{T}}=nothing) where T +function Buffer(::Type{T}, ctx::Context, len::Integer, flags; + hostbuf::Union{Nothing,Array{T}}=nothing) where T if (hostbuf !== nothing && (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) == 0) @@ -122,8 +125,8 @@ function Buffer(::Type{T}, ctx::Context, flags::CL_mem_flags, nbytes = len * sizeof(T) end - err_code = Ref{CL_int}() - mem_id = api.clCreateBuffer(ctx.id, flags, cl_uint(nbytes), + err_code = Ref{Cint}() + mem_id = clCreateBuffer(ctx.id, flags, cl_uint(nbytes), hostbuf !== nothing ? hostbuf : C_NULL, err_code) if err_code[] != CL_SUCCESS @@ -133,7 +136,7 @@ function Buffer(::Type{T}, ctx::Context, flags::CL_mem_flags, try return Buffer{T}(mem_id, false, len) catch err - api.clReleaseMemObject(mem_id) + clReleaseMemObject(mem_id) throw(err) end end @@ -147,10 +150,10 @@ function enqueue_read_buffer(q::CmdQueue, is_blocking::Bool) where T n_evts = wait_for === nothing ? UInt(0) : length(wait_for) evt_ids = wait_for === nothing ? C_NULL : [evt.id for evt in wait_for] - ret_evt = Ref{CL_event}() + ret_evt = Ref{cl_event}() nbytes = sizeof(hostbuf) @assert nbytes > 0 - @check api.clEnqueueReadBuffer(q.id, buf.id, cl_bool(is_blocking), + clEnqueueReadBuffer(q.id, buf.id, cl_bool(is_blocking), dev_offset, nbytes, hostbuf, n_evts, evt_ids, ret_evt) @return_nanny_event(ret_evt[], hostbuf) @@ -166,10 +169,10 @@ function enqueue_write_buffer(q::CmdQueue, is_blocking::Bool) where T n_evts = wait_for === nothing ? UInt(0) : length(wait_for) evt_ids = wait_for === nothing ? C_NULL : [evt.id for evt in wait_for] - ret_evt = Ref{CL_event}() + ret_evt = Ref{cl_event}() nbytes = sizeof(hostbuf) @assert nbytes > 0 - @check api.clEnqueueWriteBuffer(q.id, buf.id, cl_bool(is_blocking), + clEnqueueWriteBuffer(q.id, buf.id, cl_bool(is_blocking), offset, nbytes, hostbuf, n_evts, evt_ids, ret_evt) @return_nanny_event(ret_evt[], hostbuf) @@ -185,18 +188,18 @@ function enqueue_copy_buffer(q::CmdQueue, wait_for::Union{Nothing,Vector{Event}}) where T n_evts = wait_for === nothing ? UInt(0) : length(wait_for) evt_ids = wait_for === nothing ? C_NULL : [evt.id for evt in wait_for] - ret_evt = Ref{CL_event}() + ret_evt = Ref{cl_event}() if byte_count < 0 byte_count_src = Ref{Csize_t}() byte_count_dst = Ref{Csize_t}() - @check api.clGetMemObjectInfo(src.id, CL_MEM_SIZE, sizeof(Csize_t), + clGetMemObjectInfo(src.id, CL_MEM_SIZE, sizeof(Csize_t), byte_count_src, C_NULL) - @check api.clGetMemObjectInfo(src.id, CL_MEM_SIZE, sizeof(Csize_t), + clGetMemObjectInfo(src.id, CL_MEM_SIZE, sizeof(Csize_t), byte_count_dst, C_NULL) byte_count = min(byte_count_src[], byte_count_dst[]) end @assert byte_count > 0 - @check api.clEnqueueCopyBuffer(q.id, src.id, dst.id, + clEnqueueCopyBuffer(q.id, src.id, dst.id, src_offset, dst_offset, byte_count, n_evts, evt_ids, ret_evt) @return_event ret_evt[] @@ -228,8 +231,8 @@ function enqueue_unmap_mem(q::CmdQueue, evt_ids = [evt.id for evt in wait_for] end end - ret_evt = Ref{CL_event}() - @check api.clEnqueueUnmapMemObject(q.id, b.id, a, + ret_evt = Ref{cl_event}() + clEnqueueUnmapMemObject(q.id, b.id, a, n_evts, evt_ids, ret_evt) b.mapped = false b.hostbuf = C_NULL @@ -251,7 +254,7 @@ function enqueue_map_mem(q::CmdQueue, dims::Dims, wait_for=nothing, is_blocking=false) where T - local f::CL_map_flags + local f::cl_map_flags if flags === :r f = CL_MAP_READ elseif flags === :w @@ -267,7 +270,7 @@ end # enqueue a memory mapping operation, returning a mapped (pinned) Array and an event function enqueue_map_mem(q::CmdQueue, b::Buffer{T}, - flags::CL_map_flags, + flags::cl_map_flags, offset::Integer, dims::Dims, wait_for=nothing, @@ -281,9 +284,9 @@ function enqueue_map_mem(q::CmdQueue, flags = cl_map_flags(flags) offset = unsigned(offset) nbytes = unsigned(prod(dims) * sizeof(T)) - ret_evt = Ref{CL_event}() + ret_evt = Ref{cl_event}() status = Ref{Cint}() - mapped = api.clEnqueueMapBuffer(q.id, b.id, cl_bool(is_blocking ? 1 : 0), + mapped = clEnqueueMapBuffer(q.id, b.id, cl_bool(is_blocking ? 1 : 0), flags, offset, nbytes, n_evts, evt_ids, ret_evt, status) if status[] != CL_SUCCESS @@ -304,7 +307,7 @@ function enqueue_map_mem(q::CmdQueue, end end catch err - api.clEnqueueUnmapMemObject(q.id, b.id, mapped, + clEnqueueUnmapMemObject(q.id, b.id, mapped, unsigned(0), C_NULL, C_NULL) b.mapped = false b.hostbuf = C_NULL @@ -327,10 +330,10 @@ end evt_ids = [evt.id for evt in wait_for] n_evts = cl_uint(length(evt_ids)) end - ret_evt = Ref{CL_event}() + ret_evt = Ref{cl_event}() nbytes_pattern = sizeof(pattern) @assert nbytes_pattern > 0 - @check api.clEnqueueFillBuffer(q.id, buf.id, [pattern], + clEnqueueFillBuffer(q.id, buf.id, [pattern], unsigned(nbytes_pattern), offset, nbytes, n_evts, evt_ids, ret_evt) @return_event ret_evt[] @@ -396,18 +399,18 @@ function empty_like(ctx::Context, b::Buffer{T}) where T len = length(b) mf = info(b, :mem_flags) if :r in mf - return Buffer(T, ctx, :r, len) + return Buffer(T, ctx, len, :r) elseif :w in mf - return Buffer(T, ctx, :w, len) + return Buffer(T, ctx, len, :w) else - return Buffer(T, ctx, :rw, len) + return Buffer(T, ctx, len, :rw) end end # create an empty buffer similar to the passed in Array function empty_like(ctx::Context, a::Array{T}, flag::Symbol=:rw) where T len = length(a) - return Buffer(T, ctx, flag, len) + return Buffer(T, ctx, len, flag) end # blocking write of contents of an array to a buffer diff --git a/src/constants.jl b/src/constants.jl deleted file mode 100644 index 19f92eaa..00000000 --- a/src/constants.jl +++ /dev/null @@ -1,522 +0,0 @@ -# Error Codes -const CL_SUCCESS = 0 -const CL_DEVICE_NOT_FOUND = -1 -const CL_DEVICE_NOT_AVAILABLE = -2 -const CL_COMPILER_NOT_AVAILABLE = -3 -const CL_MEM_OBJECT_ALLOCATION_FAILURE = -4 -const CL_OUT_OF_RESOURCES = -5 -const CL_OUT_OF_HOST_MEMORY = -6 -const CL_PROFILING_INFO_NOT_AVAILABLE = -7 -const CL_MEM_COPY_OVERLAP = -8 -const CL_IMAGE_FORMAT_MISMATCH = -9 -const CL_IMAGE_FORMAT_NOT_SUPPORTED = -10 -const CL_BUILD_PROGRAM_FAILURE = -11 -const CL_MAP_FAILURE = -12 -const CL_MISALIGNED_SUB_BUFFER_OFFSET = -13 -const CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST = -14 -const CL_COMPILE_PROGRAM_FAILURE = -15 -const CL_LINKER_NOT_AVAILABLE = -15 -const CL_LINK_PROGRAM_FAILURE = -17 -const CL_DEVICE_PARTITION_FAILED = -18 -const CL_KERNEL_ARG_INFO_NOT_AVAILABLE = -19 - -const CL_INVALID_VALUE = -30 -const CL_INVALID_DEVICE_TYPE = -31 -const CL_INVALID_PLATFORM = -32 -const CL_INVALID_DEVICE = -33 -const CL_INVALID_CONTEXT = -34 -const CL_INVALID_QUEUE_PROPERTIES = -35 -const CL_INVALID_COMMAND_QUEUE = -36 -const CL_INVALID_HOST_PTR = -37 -const CL_INVALID_MEM_OBJECT = -38 -const CL_INVALID_IMAGE_FORMAT_DESCRIPTOR = -39 -const CL_INVALID_IMAGE_SIZE = -40 -const CL_INVALID_SAMPLER = -41 -const CL_INVALID_BINARY = -42 -const CL_INVALID_BUILD_OPTIONS = -43 -const CL_INVALID_PROGRAM = -44 -const CL_INVALID_PROGRAM_EXECUTABLE = -45 -const CL_INVALID_KERNEL_NAME = -46 -const CL_INVALID_KERNEL_DEFINITION = -47 -const CL_INVALID_KERNEL = -48 -const CL_INVALID_ARG_INDEX = -49 -const CL_INVALID_ARG_VALUE = -50 -const CL_INVALID_ARG_SIZE = -51 -const CL_INVALID_KERNEL_ARGS = -52 -const CL_INVALID_WORK_DIMENSION = -53 -const CL_INVALID_WORK_GROUP_SIZE = -54 -const CL_INVALID_WORK_ITEM_SIZE = -55 -const CL_INVALID_GLOBAL_OFFSET = -56 -const CL_INVALID_EVENT_WAIT_LIST = -57 -const CL_INVALID_EVENT = -58 -const CL_INVALID_OPERATION = -59 -const CL_INVALID_GL_OBJECT = -60 -const CL_INVALID_BUFFER_SIZE = -61 -const CL_INVALID_MIP_LEVEL = -62 -const CL_INVALID_GLOBAL_WORK_SIZE = -63 -const CL_INVALID_PROPERTY = -64 -const CL_INVALID_IMAGE_DESCRIPTOR = -65 -const CL_INVALID_COMPILER_OPTIONS = -66 -const CL_INVALID_LINKER_OPTIONS = -67 -const CL_INVALID_DEVICE_PARTITION_COUNT = -68 -const CL_INVALID_PIPE_SIZE = -69 -const CL_INVALID_DEVICE_QUEUE = -70 - -# OpenCL Version -const CL_VERSION_1_0 = cl_bool(1) -const CL_VERSION_1_1 = cl_bool(1) -const CL_VERSION_1_2 = cl_bool(1) -const CL_VERSION_2_0 = cl_bool(1) - -# cl_bool -const CL_FALSE = cl_bool(0) -const CL_TRUE = cl_bool(1) -const CL_BLOCKING = CL_TRUE -const CL_NON_BLOCKING = CL_FALSE - -# cl_platform_info -const CL_PLATFORM_PROFILE = cl_uint(0x0900) -const CL_PLATFORM_VERSION = cl_uint(0x0901) -const CL_PLATFORM_NAME = cl_uint(0x0902) -const CL_PLATFORM_VENDOR = cl_uint(0x0903) -const CL_PLATFORM_EXTENSIONS = cl_uint(0x0904) - -# cl_device_type - bitfield -const CL_DEVICE_TYPE_DEFAULT = cl_bitfield(1 << 0) -const CL_DEVICE_TYPE_CPU = cl_bitfield(1 << 1) -const CL_DEVICE_TYPE_GPU = cl_bitfield(1 << 2) -const CL_DEVICE_TYPE_ACCELERATOR = cl_bitfield(1 << 3) -const CL_DEVICE_TYPE_CUSTOM = cl_bitfield(1 << 4) -const CL_DEVICE_TYPE_ALL = cl_bitfield(0xFFFFFFFF) - -# cl_device_info -const CL_DEVICE_TYPE = cl_uint(0x1000) -const CL_DEVICE_VENDOR_ID = cl_uint(0x1001) -const CL_DEVICE_MAX_COMPUTE_UNITS = cl_uint(0x1002) -const CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = cl_uint(0x1003) -const CL_DEVICE_MAX_WORK_GROUP_SIZE = cl_uint(0x1004) -const CL_DEVICE_MAX_WORK_ITEM_SIZES = cl_uint(0x1005) -const CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR = cl_uint(0x1006) -const CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT = cl_uint(0x1007) -const CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT = cl_uint(0x1008) -const CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG = cl_uint(0x1009) -const CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT = cl_uint(0x100A) -const CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE = cl_uint(0x100B) -const CL_DEVICE_MAX_CLOCK_FREQUENCY = cl_uint(0x100C) -const CL_DEVICE_ADDRESS_BITS = cl_uint(0x100D) -const CL_DEVICE_MAX_READ_IMAGE_ARGS = cl_uint(0x100E) -const CL_DEVICE_MAX_WRITE_IMAGE_ARGS = cl_uint(0x100F) -const CL_DEVICE_MAX_MEM_ALLOC_SIZE = cl_uint(0x1010) -const CL_DEVICE_IMAGE2D_MAX_WIDTH = cl_uint(0x1011) -const CL_DEVICE_IMAGE2D_MAX_HEIGHT = cl_uint(0x1012) -const CL_DEVICE_IMAGE3D_MAX_WIDTH = cl_uint(0x1013) -const CL_DEVICE_IMAGE3D_MAX_HEIGHT = cl_uint(0x1014) -const CL_DEVICE_IMAGE3D_MAX_DEPTH = cl_uint(0x1015) -const CL_DEVICE_IMAGE_SUPPORT = cl_uint(0x1016) -const CL_DEVICE_MAX_PARAMETER_SIZE = cl_uint(0x1017) -const CL_DEVICE_MAX_SAMPLERS = cl_uint(0x1018) -const CL_DEVICE_MEM_BASE_ADDR_ALIGN = cl_uint(0x1019) -const CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE = cl_uint(0x101A) -const CL_DEVICE_SINGLE_FP_CONFIG = cl_uint(0x101B) -const CL_DEVICE_GLOBAL_MEM_CACHE_TYPE = cl_uint(0x101C) -const CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE = cl_uint(0x101D) -const CL_DEVICE_GLOBAL_MEM_CACHE_SIZE = cl_uint(0x101E) -const CL_DEVICE_GLOBAL_MEM_SIZE = cl_uint(0x101F) -const CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE = cl_uint(0x1020) -const CL_DEVICE_MAX_CONSTANT_ARGS = cl_uint(0x1021) -const CL_DEVICE_LOCAL_MEM_TYPE = cl_uint(0x1022) -const CL_DEVICE_LOCAL_MEM_SIZE = cl_uint(0x1023) -const CL_DEVICE_ERROR_CORRECTION_SUPPORT = cl_uint(0x1024) -const CL_DEVICE_PROFILING_TIMER_RESOLUTION = cl_uint(0x1025) -const CL_DEVICE_ENDIAN_LITTLE = cl_uint(0x1026) -const CL_DEVICE_AVAILABLE = cl_uint(0x1027) -const CL_DEVICE_COMPILER_AVAILABLE = cl_uint(0x1028) -const CL_DEVICE_EXECUTION_CAPABILITIES = cl_uint(0x1029) -const CL_DEVICE_QUEUE_PROPERTIES = cl_uint(0x102A) # deprecated -const CL_DEVICE_QUEUE_ON_HOST_PROPERTIES = cl_uint(0x102A) -const CL_DEVICE_NAME = cl_uint(0x102B) -const CL_DEVICE_VENDOR = cl_uint(0x102C) -const CL_DRIVER_VERSION = cl_uint(0x102D) -const CL_DEVICE_PROFILE = cl_uint(0x102E) -const CL_DEVICE_VERSION = cl_uint(0x102F) -const CL_DEVICE_EXTENSIONS = cl_uint(0x1030) -const CL_DEVICE_PLATFORM = cl_uint(0x1031) -const CL_DEVICE_DOUBLE_FP_CONFIG = cl_uint(0x1032) - -# 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG -const CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF = cl_uint(0x1034) -const CL_DEVICE_HOST_UNIFIED_MEMORY = cl_uint(0x1035) # deprecated -const CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR = cl_uint(0x1036) -const CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT = cl_uint(0x1037) -const CL_DEVICE_NATIVE_VECTOR_WIDTH_INT = cl_uint(0x1038) -const CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG = cl_uint(0x1039) -const CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT = cl_uint(0x103A) -const CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE = cl_uint(0x103B) -const CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF = cl_uint(0x103C) -const CL_DEVICE_OPENCL_C_VERSION = cl_uint(0x103D) -const CL_DEVICE_LINKER_AVAILABLE = cl_uint(0x103E) -const CL_DEVICE_BUILT_IN_KERNELS = cl_uint(0x103F) -const CL_DEVICE_IMAGE_MAX_BUFFER_SIZE = cl_uint(0x1040) -const CL_DEVICE_IMAGE_MAX_ARRAY_SIZE = cl_uint(0x1041) -const CL_DEVICE_PARENT_DEVICE = cl_uint(0x1042) -const CL_DEVICE_PARTITION_MAX_SUB_DEVICES = cl_uint(0x1043) -const CL_DEVICE_PARTITION_PROPERTIES = cl_uint(0x1044) -const CL_DEVICE_PARTITION_AFFINITY_DOMAIN = cl_uint(0x1045) -const CL_DEVICE_PARTITION_TYPE = cl_uint(0x1046) -const CL_DEVICE_REFERENCE_COUNT = cl_uint(0x1047) -const CL_DEVICE_PREFERRED_INTEROP_USER_SYNC = cl_uint(0x1048) -const CL_DEVICE_PRINTF_BUFFER_SIZE = cl_uint(0x1049) -const CL_DEVICE_IMAGE_PITCH_ALIGNMENT = cl_uint(0x104A) -const CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT = cl_uint(0x104B) -const CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS = cl_uint(0x104C) -const CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE = cl_uint(0x104D) -const CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES = cl_uint(0x104E) -const CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE = cl_uint(0x104F) -const CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE = cl_uint(0x1050) -const CL_DEVICE_MAX_ON_DEVICE_QUEUES = cl_uint(0x1051) -const CL_DEVICE_MAX_ON_DEVICE_EVENTS = cl_uint(0x1052) -const CL_DEVICE_SVM_CAPABILITIES = cl_uint(0x1053) -const CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE = cl_uint(0x1054) -const CL_DEVICE_MAX_PIPE_ARGS = cl_uint(0x1055) -const CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS = cl_uint(0x1056) -const CL_DEVICE_PIPE_MAX_PACKET_SIZE = cl_uint(0x1057) -const CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT = cl_uint(0x1058) -const CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT = cl_uint(0x1059) -const CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT = cl_uint(0x105A) - -# cl_device_fp_config - bitfield -const CL_FP_DENORM = cl_bitfield(1 << 0) -const CL_FP_INF_NAN = cl_bitfield(1 << 1) -const CL_FP_ROUND_TO_NEAREST = cl_bitfield(1 << 2) -const CL_FP_ROUND_TO_ZERO = cl_bitfield(1 << 3) -const CL_FP_ROUND_TO_INF = cl_bitfield(1 << 4) -const CL_FP_FMA = cl_bitfield(1 << 5) -const CL_FP_SOFT_FLOAT = cl_bitfield(1 << 6) -const CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT = cl_bitfield(1 << 7) - -# cl_device_mem_cache_type -const CL_NONE = cl_uint(0x0) -const CL_READ_ONLY_CACHE = cl_uint(0x1) -const CL_READ_WRITE_CACHE = cl_uint(0x2) - -# cl_device_local_mem_type -const CL_LOCAL = cl_uint(0x1) -const CL_GLOBAL = cl_uint(0x2) - -# cl_device_exec_capabilities - bitfield -const CL_EXEC_KERNEL = cl_bitfield(1 << 0) -const CL_EXEC_NATIVE_KERNEL = cl_bitfield(1 << 1) - -# cl_command_queue_properties - bitfield -const CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE = cl_bitfield(1 << 0) -const CL_QUEUE_PROFILING_ENABLE = cl_bitfield(1 << 1) -const CL_QUEUE_ON_DEVICE = cl_bitfield(1 << 2) -const CL_QUEUE_ON_DEVICE_DEFAULT = cl_bitfield(1 << 3) - -# cl_context_info -const CL_CONTEXT_REFERENCE_COUNT = cl_uint(0x1080) -const CL_CONTEXT_DEVICES = cl_uint(0x1081) -const CL_CONTEXT_PROPERTIES = cl_uint(0x1082) -const CL_CONTEXT_NUM_DEVICES = cl_uint(0x1083) - -# cl_context_properties -const CL_CONTEXT_PLATFORM = cl_uint(0x1084) -const CL_CONTEXT_INTEROP_USER_SYNC = cl_uint(0x1085) - -# cl_device_partition_property -const CL_DEVICE_PARTITION_EQUALLY = cl_uint(0x1086) -const CL_DEVICE_PARTITION_BY_COUNTS = cl_uint(0x1087) -const CL_DEVICE_PARTITION_BY_COUNTS_LIST_END = cl_uint(0x0) -const CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN = cl_uint(0x1088) - -# cl_device_affinity_domain -const CL_DEVICE_AFFINITY_DOMAIN_NUMA = cl_bitfield(1 << 0) -const CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE = cl_bitfield(1 << 1) -const CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE = cl_bitfield(1 << 2) -const CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE = cl_bitfield(1 << 3) -const CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE = cl_bitfield(1 << 4) -const CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE = cl_bitfield(1 << 5) - -# cl_device_svm_capabilities -const CL_DEVICE_SVM_COARSE_GRAIN_BUFFER = cl_bitfield(1 << 0) -const CL_DEVICE_SVM_FINE_GRAIN_BUFFER = cl_bitfield(1 << 1) -const CL_DEVICE_SVM_FINE_GRAIN_SYSTEM = cl_bitfield(1 << 2) -const CL_DEVICE_SVM_ATOMICS = cl_bitfield(1 << 3) - -# cl_command_queue_info -const CL_QUEUE_CONTEXT = cl_uint(0x1090) -const CL_QUEUE_DEVICE = cl_uint(0x1091) -const CL_QUEUE_REFERENCE_COUNT = cl_uint(0x1092) -const CL_QUEUE_PROPERTIES = cl_uint(0x1093) -const CL_QUEUE_SIZE = cl_uint(0x1094) - -# cl_mem_flags and cl_svm_mem_flags - bitfield -const CL_MEM_READ_WRITE = cl_bitfield(1 << 0) -const CL_MEM_WRITE_ONLY = cl_bitfield(1 << 1) -const CL_MEM_READ_ONLY = cl_bitfield(1 << 2) -const CL_MEM_USE_HOST_PTR = cl_bitfield(1 << 3) -const CL_MEM_ALLOC_HOST_PTR = cl_bitfield(1 << 4) -const CL_MEM_COPY_HOST_PTR = cl_bitfield(1 << 5) -# //reserved = (1 << 6) -const CL_MEM_HOST_WRITE_ONLY = cl_bitfield(1 << 7) -const CL_MEM_HOST_READ_ONLY = cl_bitfield(1 << 8) -const CL_MEM_HOST_NO_ACCESS = cl_bitfield(1 << 9) -const CL_MEM_SVM_FINE_GRAIN_BUFFER = cl_bitfield(1 << 10) # used by cl_svm_mem_flags only -const CL_MEM_SVM_ATOMICS = cl_bitfield(1 << 11) # used by cl_svm_mem_flags only -const CL_MEM_KERNEL_READ_AND_WRITE = cl_bitfield(1 << 12) - -# cl_mem_migration_flags - bitfield -const CL_MIGRATE_MEM_OBJECT_HOST = cl_bitfield(1 << 0) -const CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED = cl_bitfield(1 << 1) - -# cl_channel_order -const CL_R = cl_uint(0x10B0) -const CL_A = cl_uint(0x10B1) -const CL_RG = cl_uint(0x10B2) -const CL_RA = cl_uint(0x10B3) -const CL_RGB = cl_uint(0x10B4) -const CL_RGBA = cl_uint(0x10B5) -const CL_BGRA = cl_uint(0x10B6) -const CL_ARGB = cl_uint(0x10B7) -const CL_INTENSITY = cl_uint(0x10B8) -const CL_LUMINANCE = cl_uint(0x10B9) -const CL_Rx = cl_uint(0x10BA) -const CL_RGx = cl_uint(0x10BB) -const CL_RGBx = cl_uint(0x10BC) -const CL_DEPTH = cl_uint(0x10BD) -const CL_DEPTH_STENCIL = cl_uint(0x10BE) -const CL_sRGB = cl_uint(0x10BF) -const CL_sRGBx = cl_uint(0x10C0) -const CL_sRGBA = cl_uint(0x10C1) -const CL_sBGRA = cl_uint(0x10C2) -const CL_ABGR = cl_uint(0x10C3) - -# cl_channel_type -const CL_SNORM_INT8 = cl_uint(0x10D0) -const CL_SNORM_INT16 = cl_uint(0x10D1) -const CL_UNORM_INT8 = cl_uint(0x10D2) -const CL_UNORM_INT16 = cl_uint(0x10D3) -const CL_UNORM_SHORT_565 = cl_uint(0x10D4) -const CL_UNORM_SHORT_555 = cl_uint(0x10D5) -const CL_UNORM_INT_101010 = cl_uint(0x10D6) -const CL_SIGNED_INT8 = cl_uint(0x10D7) -const CL_SIGNED_INT16 = cl_uint(0x10D8) -const CL_SIGNED_INT32 = cl_uint(0x10D9) -const CL_UNSIGNED_INT8 = cl_uint(0x10DA) -const CL_UNSIGNED_INT16 = cl_uint(0x10DB) -const CL_UNSIGNED_INT32 = cl_uint(0x10DC) -const CL_HALF_FLOAT = cl_uint(0x10DD) -const CL_FLOAT = cl_uint(0x10DE) -const CL_UNORM_INT24 = cl_uint(0x10DF) - -# cl_mem_object_type -const CL_MEM_OBJECT_BUFFER = cl_uint(0x10F0) -const CL_MEM_OBJECT_IMAGE2D = cl_uint(0x10F1) -const CL_MEM_OBJECT_IMAGE3D = cl_uint(0x10F2) -const CL_MEM_OBJECT_IMAGE2D_ARRAY = cl_uint(0x10F3) -const CL_MEM_OBJECT_IMAGE1D = cl_uint(0x10F4) -const CL_MEM_OBJECT_IMAGE1D_ARRAY = cl_uint(0x10F5) -const CL_MEM_OBJECT_IMAGE1D_BUFFER = cl_uint(0x10F6) -const CL_MEM_OBJECT_PIPE = cl_uint(0x10F7) - -# cl_mem_info -const CL_MEM_TYPE = cl_uint(0x1100) -const CL_MEM_FLAGS = cl_uint(0x1101) -const CL_MEM_SIZE = cl_uint(0x1102) -const CL_MEM_HOST_PTR = cl_uint(0x1103) -const CL_MEM_MAP_COUNT = cl_uint(0x1104) -const CL_MEM_REFERENCE_COUNT = cl_uint(0x1105) -const CL_MEM_CONTEXT = cl_uint(0x1106) -const CL_MEM_ASSOCIATED_MEMOBJECT = cl_uint(0x1107) -const CL_MEM_OFFSET = cl_uint(0x1108) -const CL_MEM_USES_SVM_POINTER = cl_uint(0x1109) - -# cl_image_info -const CL_IMAGE_FORMAT = cl_uint(0x1110) -const CL_IMAGE_ELEMENT_SIZE = cl_uint(0x1111) -const CL_IMAGE_ROW_PITCH = cl_uint(0x1112) -const CL_IMAGE_SLICE_PITCH = cl_uint(0x1113) -const CL_IMAGE_WIDTH = cl_uint(0x1114) -const CL_IMAGE_HEIGHT = cl_uint(0x1115) -const CL_IMAGE_DEPTH = cl_uint(0x1116) -const CL_IMAGE_ARRAY_SIZE = cl_uint(0x1117) -const CL_IMAGE_BUFFER = cl_uint(0x1118) -const CL_IMAGE_NUM_MIP_LEVELS = cl_uint(0x1119) -const CL_IMAGE_NUM_SAMPLES = cl_uint(0x111A) - -# cl_pipe_info -const CL_PIPE_PACKET_SIZE = cl_uint(0x1120) -const CL_PIPE_MAX_PACKETS = cl_uint(0x1121) - -# cl_addressing_mode -const CL_ADDRESS_NONE = cl_uint(0x1130) -const CL_ADDRESS_CLAMP_TO_EDGE = cl_uint(0x1131) -const CL_ADDRESS_CLAMP = cl_uint(0x1132) -const CL_ADDRESS_REPEAT = cl_uint(0x1133) -const CL_ADDRESS_MIRRORED_REPEAT = cl_uint(0x1134) - -# cl_filter_mode -const CL_FILTER_NEAREST = cl_uint(0x1140) -const CL_FILTER_LINEAR = cl_uint(0x1141) - -# cl_sampler_info -const CL_SAMPLER_REFERENCE_COUNT = cl_uint(0x1150) -const CL_SAMPLER_CONTEXT = cl_uint(0x1151) -const CL_SAMPLER_NORMALIZED_COORDS = cl_uint(0x1152) -const CL_SAMPLER_ADDRESSING_MODE = cl_uint(0x1153) -const CL_SAMPLER_FILTER_MODE = cl_uint(0x1154) -const CL_SAMPLER_MIP_FILTER_MODE = cl_uint(0x1155) -const CL_SAMPLER_LOD_MIN = cl_uint(0x1156) -const CL_SAMPLER_LOD_MAX = cl_uint(0x1157) - -# cl_map_flags - bitfield -const CL_MAP_READ = cl_bitfield(1 << 0) -const CL_MAP_WRITE = cl_bitfield(1 << 1) -const CL_MAP_WRITE_INVALIDATE_REGION = cl_bitfield(1 << 2) - -# cl_program_info -const CL_PROGRAM_REFERENCE_COUNT = cl_uint(0x1160) -const CL_PROGRAM_CONTEXT = cl_uint(0x1161) -const CL_PROGRAM_NUM_DEVICES = cl_uint(0x1162) -const CL_PROGRAM_DEVICES = cl_uint(0x1163) -const CL_PROGRAM_SOURCE = cl_uint(0x1164) -const CL_PROGRAM_BINARY_SIZES = cl_uint(0x1165) -const CL_PROGRAM_BINARIES = cl_uint(0x1166) -const CL_PROGRAM_NUM_KERNELS = cl_uint(0x1167) -const CL_PROGRAM_KERNEL_NAMES = cl_uint(0x1168) - -# cl_program_build_info -const CL_PROGRAM_BUILD_STATUS = cl_uint(0x1181) -const CL_PROGRAM_BUILD_OPTIONS = cl_uint(0x1182) -const CL_PROGRAM_BUILD_LOG = cl_uint(0x1183) -const CL_PROGRAM_BINARY_TYPE = cl_uint(0x1184) -const CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE = cl_uint(0x1185) - -# cl_program_binary_type -const CL_PROGRAM_BINARY_TYPE_NONE = cl_uint(0x0) -const CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT = cl_uint(0x1) -const CL_PROGRAM_BINARY_TYPE_LIBRARY = cl_uint(0x2) -const CL_PROGRAM_BINARY_TYPE_EXECUTABLE = cl_uint(0x4) - -# cl_build_status -const CL_BUILD_SUCCESS = 0 -const CL_BUILD_NONE = -1 -const CL_BUILD_ERROR = -2 -const CL_BUILD_IN_PROGRESS = -3 - -# cl_kernel_info -const CL_KERNEL_FUNCTION_NAME = cl_uint(0x1190) -const CL_KERNEL_NUM_ARGS = cl_uint(0x1191) -const CL_KERNEL_REFERENCE_COUNT = cl_uint(0x1192) -const CL_KERNEL_CONTEXT = cl_uint(0x1193) -const CL_KERNEL_PROGRAM = cl_uint(0x1194) -const CL_KERNEL_ATTRIBUTES = cl_uint(0x1195) - -# cl_kernel_arg_info -const CL_KERNEL_ARG_ADDRESS_QUALIFIER = cl_uint(0x1196) -const CL_KERNEL_ARG_ACCESS_QUALIFIER = cl_uint(0x1197) -const CL_KERNEL_ARG_TYPE_NAME = cl_uint(0x1198) -const CL_KERNEL_ARG_TYPE_QUALIFIER = cl_uint(0x1199) -const CL_KERNEL_ARG_NAME = cl_uint(0x119A) - -# cl_kernel_arg_address_qualifier -const CL_KERNEL_ARG_ADDRESS_GLOBAL = cl_uint(0x119B) -const CL_KERNEL_ARG_ADDRESS_LOCAL = cl_uint(0x119C) -const CL_KERNEL_ARG_ADDRESS_CONSTANT = cl_uint(0x119D) -const CL_KERNEL_ARG_ADDRESS_PRIVATE = cl_uint(0x119E) - -# cl_kernel_arg_access_qualifier -const CL_KERNEL_ARG_ACCESS_READ_ONLY = cl_uint(0x11A0) -const CL_KERNEL_ARG_ACCESS_WRITE_ONLY = cl_uint(0x11A1) -const CL_KERNEL_ARG_ACCESS_READ_WRITE = cl_uint(0x11A2) -const CL_KERNEL_ARG_ACCESS_NONE = cl_uint(0x11A3) - -# cl_kernel_arg_type_qualifer -const CL_KERNEL_ARG_TYPE_NONE = cl_bitfield(0) -const CL_KERNEL_ARG_TYPE_CONST = cl_bitfield(1 << 0) -const CL_KERNEL_ARG_TYPE_RESTRICT = cl_bitfield(1 << 1) -const CL_KERNEL_ARG_TYPE_VOLATILE = cl_bitfield(1 << 2) - -# cl_kernel_work_group_info -const CL_KERNEL_WORK_GROUP_SIZE = cl_uint(0x11B0) -const CL_KERNEL_COMPILE_WORK_GROUP_SIZE = cl_uint(0x11B1) -const CL_KERNEL_LOCAL_MEM_SIZE = cl_uint(0x11B2) -const CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = cl_uint(0x11B3) -const CL_KERNEL_PRIVATE_MEM_SIZE = cl_uint(0x11B4) -const CL_KERNEL_GLOBAL_WORK_SIZE = cl_uint(0x11B5) - -# cl_kernel_exec_info -const CL_KERNEL_EXEC_INFO_SVM_PTRS = cl_uint(0x11B6) -const CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM = cl_uint(0x11B7) - -# cl_event_info -const CL_EVENT_COMMAND_QUEUE = cl_uint(0x11D0) -const CL_EVENT_COMMAND_TYPE = cl_uint(0x11D1) -const CL_EVENT_REFERENCE_COUNT = cl_uint(0x11D2) -const CL_EVENT_COMMAND_EXECUTION_STATUS = cl_uint(0x11D3) -const CL_EVENT_CONTEXT = cl_uint(0x11D4) - -# cl_command_type -const CL_COMMAND_NDRANGE_KERNEL = cl_uint(0x11F0) -const CL_COMMAND_TASK = cl_uint(0x11F1) -const CL_COMMAND_NATIVE_KERNEL = cl_uint(0x11F2) -const CL_COMMAND_READ_BUFFER = cl_uint(0x11F3) -const CL_COMMAND_WRITE_BUFFER = cl_uint(0x11F4) -const CL_COMMAND_COPY_BUFFER = cl_uint(0x11F5) -const CL_COMMAND_READ_IMAGE = cl_uint(0x11F6) -const CL_COMMAND_WRITE_IMAGE = cl_uint(0x11F7) -const CL_COMMAND_COPY_IMAGE = cl_uint(0x11F8) -const CL_COMMAND_COPY_IMAGE_TO_BUFFER = cl_uint(0x11F9) -const CL_COMMAND_COPY_BUFFER_TO_IMAGE = cl_uint(0x11FA) -const CL_COMMAND_MAP_BUFFER = cl_uint(0x11FB) -const CL_COMMAND_MAP_IMAGE = cl_uint(0x11FC) -const CL_COMMAND_UNMAP_MEM_OBJECT = cl_uint(0x11FD) -const CL_COMMAND_MARKER = cl_uint(0x11FE) -const CL_COMMAND_ACQUIRE_GL_OBJECTS = cl_uint(0x11FF) -const CL_COMMAND_RELEASE_GL_OBJECTS = cl_uint(0x1200) -const CL_COMMAND_READ_BUFFER_RECT = cl_uint(0x1201) -const CL_COMMAND_WRITE_BUFFER_RECT = cl_uint(0x1202) -const CL_COMMAND_COPY_BUFFER_RECT = cl_uint(0x1203) -const CL_COMMAND_USER = cl_uint(0x1204) -const CL_COMMAND_BARRIER = cl_uint(0x1205) -const CL_COMMAND_MIGRATE_MEM_OBJECTS = cl_uint(0x1206) -const CL_COMMAND_FILL_BUFFER = cl_uint(0x1207) -const CL_COMMAND_FILL_IMAGE = cl_uint(0x1208) -const CL_COMMAND_SVM_FREE = cl_uint(0x1209) -const CL_COMMAND_SVM_MEMCPY = cl_uint(0x120A) -const CL_COMMAND_SVM_MEMFILL = cl_uint(0x120B) -const CL_COMMAND_SVM_MAP = cl_uint(0x120C) -const CL_COMMAND_SVM_UNMAP = cl_uint(0x120D) - -# command execution status -const CL_COMPLETE = cl_uint(0x0) -const CL_RUNNING = cl_uint(0x1) -const CL_SUBMITTED = cl_uint(0x2) -const CL_QUEUED = cl_uint(0x3) - -# cl_buffer_create_type -const CL_BUFFER_CREATE_TYPE_REGION = cl_uint(0x1220) - -# cl_profiling_info -const CL_PROFILING_COMMAND_QUEUED = cl_uint(0x1280) -const CL_PROFILING_COMMAND_SUBMIT = cl_uint(0x1281) -const CL_PROFILING_COMMAND_START = cl_uint(0x1282) -const CL_PROFILING_COMMAND_END = cl_uint(0x1283) -const CL_PROFILING_COMMAND_COMPLETE = cl_uint(0x1284) - -# OpenCL OpenGL Constants - -# cl_gl_context_info -const CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR = cl_uint(0x2006) -const CL_DEVICES_FOR_GL_CONTEXT_KHR = cl_uint(0x2007) - -# Additional cl_context_properties -const CL_GL_CONTEXT_KHR = cl_uint(0x2008) -const CL_EGL_DISPLAY_KHR = cl_uint(0x2009) -const CL_GLX_DISPLAY_KHR = cl_uint(0x200A) -const CL_WGL_HDC_KHR = cl_uint(0x200B) -const CL_CGL_SHAREGROUP_KHR = cl_uint(0x200C) - -if Sys.isapple() -const CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE = cl_uint(0x10000000) -end diff --git a/src/context.jl b/src/context.jl index b4a98de9..b952a6cf 100644 --- a/src/context.jl +++ b/src/context.jl @@ -1,16 +1,16 @@ # OpenCL.Context -const _ctx_reference_count = Dict{CL_context, Int}() +const _ctx_reference_count = Dict{cl_context, Int}() -function create_jl_reference!(ctx_id::CL_context) +function create_jl_reference!(ctx_id::cl_context) if haskey(_ctx_reference_count, ctx_id) # for the first jl reference, we already have a refcount of 1 - @check api.clRetainContext(ctx_id) # increase internal refcount, if creating an additional reference + clRetainContext(ctx_id) # increase internal refcount, if creating an additional reference end refcount = get!(_ctx_reference_count, ctx_id, 0) _ctx_reference_count[ctx_id] = refcount + 1 return end -function free_jl_reference!(ctx_id::CL_context) +function free_jl_reference!(ctx_id::cl_context) if !haskey(_ctx_reference_count, ctx_id) error("Freeing unknown context") end @@ -26,12 +26,12 @@ function free_jl_reference!(ctx_id::CL_context) end mutable struct Context <: CLObject - id :: CL_context + id::cl_context # If created from ctx_id already, we need to increase the reference count # because then we give out multiple context references with multiple finalizers to the world # TODO should we make it in a way, that you can't overwrite it? - function Context(ctx_id::CL_context; retain = false) - retain && @check api.clRetainContext(ctx_id) + function Context(ctx_id::cl_context; retain = false) + retain && clRetainContext(ctx_id) if !is_ctx_id_alive(ctx_id) error("ctx_id not alive: ", ctx_id) end @@ -50,21 +50,21 @@ mutable struct Context <: CLObject end number_of_references(ctx::Context) = number_of_references(ctx.id) -function number_of_references(ctx_id::CL_context) - refcounts = Ref{CL_uint}() - @check api.clGetContextInfo( +function number_of_references(ctx_id::cl_context) + refcounts = Ref{Cuint}() + clGetContextInfo( ctx_id, CL_CONTEXT_REFERENCE_COUNT, - sizeof(CL_uint), refcounts, C_NULL + sizeof(Cuint), refcounts, C_NULL ) return refcounts[] end -function is_ctx_id_alive(ctx_id::CL_context) +function is_ctx_id_alive(ctx_id::cl_context) number_of_references(ctx_id) > 0 end -function release_ctx_id(ctx_id::CL_context) +function release_ctx_id(ctx_id::cl_context) if is_ctx_id_alive(ctx_id) - @check api.clReleaseContext(ctx_id) + clReleaseContext(ctx_id) else error("Double free for context: ", ctx_id) end @@ -82,10 +82,10 @@ function Base.show(io::IO, ctx::Context) end struct _CtxErr - handle :: Ptr{Nothing} - err_info :: Ptr{Cchar} - priv_info :: Ptr{Nothing} - cb :: Csize_t + handle::Ptr{Nothing} + err_info::Ptr{Cchar} + priv_info::Ptr{Nothing} + cb::Csize_t end const io_lock = ReentrantLock() @@ -129,15 +129,15 @@ function Context(devs::Vector{Device}; end n_devices = length(devs) - device_ids = Vector{CL_device_id}(undef, n_devices) + device_ids = Vector{cl_device_id}(undef, n_devices) for (i, d) in enumerate(devs) device_ids[i] = d.id end - err_code = Ref{CL_int}() + err_code = Ref{Cint}() payload = callback === nothing ? raise_context_error : callback f_ptr = @cfunction($payload, Nothing, (Ptr{Cchar}, Ptr{Nothing}, Csize_t)) - ctx_id = api.clCreateContext( + ctx_id = clCreateContext( ctx_properties, n_devices, device_ids, ctx_callback_ptr(), f_ptr, err_code) if err_code[] != CL_SUCCESS @@ -150,7 +150,7 @@ end Context(d::Device; properties=nothing, callback=nothing) = Context([d], properties=properties, callback=callback) -function Context(dev_type::CL_device_type; properties = nothing, callback = nothing) +function Context(dev_type; properties = nothing, callback = nothing) if properties !== nothing ctx_properties = _parse_properties(properties) else @@ -161,9 +161,9 @@ function Context(dev_type::CL_device_type; properties = nothing, callback = noth else ctx_user_data_cb = raise_context_error end - err_code = Ref{CL_int}() + err_code = Ref{Cint}() ctx_user_data = @cfunction($ctx_user_data_cb, Nothing, (Ptr{Cchar}, Ptr{Nothing}, Csize_t)) - ctx_id = api.clCreateContextFromType(ctx_properties, dev_type, + ctx_id = clCreateContextFromType(ctx_properties, dev_type, ctx_callback_ptr(), ctx_user_data, err_code) if err_code[] != CL_SUCCESS throw(CLError(err_code[])) @@ -178,19 +178,19 @@ function Context(dev_type::Symbol; end -function properties(ctx_id::CL_context) +function properties(ctx_id::cl_context) nbytes = Ref{Csize_t}(0) - @check api.clGetContextInfo(ctx_id, CL_CONTEXT_PROPERTIES, 0, C_NULL, nbytes) + clGetContextInfo(ctx_id, CL_CONTEXT_PROPERTIES, 0, C_NULL, nbytes) # Calculate length of storage array # At nbytes[] the size of the properties array in bytes is stored - # The length of the property array is then nbytes[] / sizeof(CL_context_properties) + # The length of the property array is then nbytes[] / sizeof(cl_context_properties) # Note: nprops should be odd since it requires a C_NULL terminated array - nprops = div(nbytes[], sizeof(CL_context_properties)) + nprops = div(nbytes[], sizeof(cl_context_properties)) - props = Vector{CL_context_properties}(undef, nprops) - @check api.clGetContextInfo(ctx_id, CL_CONTEXT_PROPERTIES, - nbytes[], props, C_NULL) + props = Vector{cl_context_properties}(undef, nprops) + clGetContextInfo(ctx_id, CL_CONTEXT_PROPERTIES, + nbytes[], props, C_NULL) #properties array of [key,value..., C_NULL] result = Any[] for i in 1:2:nprops @@ -205,8 +205,6 @@ function properties(ctx_id::CL_context) key == CL_WGL_HDC_KHR || key == CL_CGL_SHAREGROUP_KHR push!(result, (key, value)) - elseif Sys.isapple() ? (key == CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE) : false - push!(result, (key, value)) elseif key == 0 if i != nprops @warn("Encountered OpenCL.Context property key == 0 at position $i") @@ -228,7 +226,7 @@ function _parse_properties(props) if isempty(props) return C_NULL end - cl_props = CL_context_properties[] + cl_props = cl_context_properties[] for prop_tuple in props if length(prop_tuple) != 2 throw(ArgumentError("Context property tuples must be of type (key, value)")) @@ -256,9 +254,9 @@ function _parse_properties(props) end function num_devices(ctx::Context) - ndevices = Ref{CL_uint}() - @check api.clGetContextInfo(ctx.id, CL_CONTEXT_NUM_DEVICES, - sizeof(CL_uint), ndevices, C_NULL) + ndevices = Ref{Cuint}() + clGetContextInfo(ctx.id, CL_CONTEXT_NUM_DEVICES, + sizeof(Cuint), ndevices, C_NULL) return ndevices[] end @@ -267,9 +265,9 @@ function devices(ctx::Context) if n == 0 return [] end - dev_ids = Vector{CL_device_id}(undef, n) - @check api.clGetContextInfo(ctx.id, CL_CONTEXT_DEVICES, - n * sizeof(CL_device_id), dev_ids, C_NULL) + dev_ids = Vector{cl_device_id}(undef, n) + clGetContextInfo(ctx.id, CL_CONTEXT_DEVICES, + n * sizeof(cl_device_id), dev_ids, C_NULL) return [Device(id) for id in dev_ids] end @@ -283,7 +281,8 @@ function create_some_context() local ctx::Context try ctx = Context(dev) - catch + catch err + @warn "Could not create context for GPU device $dev" exception=(err,catch_backtrace()) continue end return ctx @@ -295,7 +294,8 @@ function create_some_context() local ctx::Context try ctx = Context(dev) - catch + catch err + @warn "Could not create context for CPU device $dev" exception=(err,catch_backtrace()) continue end return ctx diff --git a/src/device.jl b/src/device.jl index 8d35d242..743b19d9 100644 --- a/src/device.jl +++ b/src/device.jl @@ -1,7 +1,7 @@ # OpenCL.Device struct Device <: CLObject - id :: CL_device_id + id::cl_device_id end Base.pointer(d::Device) = d.id @@ -21,7 +21,7 @@ macro int_info(func, cl_device_info, return_type) quote function $(esc(func))(d::Device) result = Ref{$return_type}() - @check api.clGetDeviceInfo(d.id, $cl_device_info, + clGetDeviceInfo(d.id, $cl_device_info, sizeof($return_type), result, C_NULL) return result[] end @@ -32,61 +32,61 @@ function info(d::Device, s::Symbol) profile(d::Device) = begin size = Ref{Csize_t}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_PROFILE, 0, C_NULL, size) - result = Vector{CL_char}(undef, size[]) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_PROFILE, size[], result, C_NULL) + clGetDeviceInfo(d.id, CL_DEVICE_PROFILE, 0, C_NULL, size) + result = Vector{Cchar}(undef, size[]) + clGetDeviceInfo(d.id, CL_DEVICE_PROFILE, size[], result, C_NULL) bs = CLString(result) return bs end version(d::Device) = begin size = Ref{Csize_t}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_VERSION, 0, C_NULL, size) - result = Vector{CL_char}(undef, size[]) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_VERSION, size[], result, C_NULL) + clGetDeviceInfo(d.id, CL_DEVICE_VERSION, 0, C_NULL, size) + result = Vector{Cchar}(undef, size[]) + clGetDeviceInfo(d.id, CL_DEVICE_VERSION, size[], result, C_NULL) bs = CLString(result) return bs end driver_version(d::Device) = begin size = Ref{Csize_t}() - @check api.clGetDeviceInfo(d.id, CL_DRIVER_VERSION, 0, C_NULL, size) - result = Vector{CL_char}(undef, size[]) - @check api.clGetDeviceInfo(d.id, CL_DRIVER_VERSION, size[], result, C_NULL) + clGetDeviceInfo(d.id, CL_DRIVER_VERSION, 0, C_NULL, size) + result = Vector{Cchar}(undef, size[]) + clGetDeviceInfo(d.id, CL_DRIVER_VERSION, size[], result, C_NULL) bs = CLString(result) return string(replace(bs, r"\s+" => " ")) end extensions(d::Device) = begin size = Ref{Csize_t}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_EXTENSIONS, 0, C_NULL, size) - result = Vector{CL_char}(undef, size[]) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_EXTENSIONS, size[], result, C_NULL) + clGetDeviceInfo(d.id, CL_DEVICE_EXTENSIONS, 0, C_NULL, size) + result = Vector{Cchar}(undef, size[]) + clGetDeviceInfo(d.id, CL_DEVICE_EXTENSIONS, size[], result, C_NULL) bs = CLString(result) return String[string(s) for s in split(bs)] end platform(d::Device) = begin - result = Ref{CL_platform_id}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_PLATFORM, - sizeof(CL_platform_id), result, C_NULL) + result = Ref{cl_platform_id}() + clGetDeviceInfo(d.id, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), result, C_NULL) return Platform(result[]) end name(d::Device) = begin size = Ref{Csize_t}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_NAME, 0, C_NULL, size) - result = Vector{CL_char}(undef, size[]) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_NAME, - size[] * sizeof(CL_char), result, C_NULL) + clGetDeviceInfo(d.id, CL_DEVICE_NAME, 0, C_NULL, size) + result = Vector{Cchar}(undef, size[]) + clGetDeviceInfo(d.id, CL_DEVICE_NAME, + size[] * sizeof(Cchar), result, C_NULL) n = CLString(result) return string(replace(n, r"\s+" => " ")) end device_type(d::Device) = begin - result = Ref{CL_device_type}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_TYPE, - sizeof(CL_device_type), result, C_NULL) + result = Ref{cl_device_type}() + clGetDeviceInfo(d.id, CL_DEVICE_TYPE, + sizeof(cl_device_type), result, C_NULL) result = result[] if result == CL_DEVICE_TYPE_GPU return :gpu @@ -102,13 +102,13 @@ function info(d::Device, s::Symbol) end has_image_support(d::Device) = begin - has_support = Ref{CL_bool}(CL_FALSE) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_IMAGE_SUPPORT, - sizeof(CL_bool), has_support, C_NULL) + has_support = Ref{cl_bool}(CL_FALSE) + clGetDeviceInfo(d.id, CL_DEVICE_IMAGE_SUPPORT, + sizeof(cl_bool), has_support, C_NULL) return has_support[] == CL_TRUE end - @int_info(queue_properties, CL_DEVICE_QUEUE_PROPERTIES, CL_command_queue_properties) + @int_info(queue_properties, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) has_queue_out_of_order_exec(d::Device) = (queue_properties(d) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0 @@ -117,58 +117,58 @@ function info(d::Device, s::Symbol) (queue_properties(d) & CL_QUEUE_PROFILING_ENABLE) != 0 has_native_kernel(d::Device) = begin - result = Ref{CL_device_exec_capabilities}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_EXECUTION_CAPABILITIES, - sizeof(CL_device_exec_capabilities), result, C_NULL) + result = Ref{cl_device_exec_capabilities}() + clGetDeviceInfo(d.id, CL_DEVICE_EXECUTION_CAPABILITIES, + sizeof(cl_device_exec_capabilities), result, C_NULL) return (result[] & CL_EXEC_NATIVE_KERNEL) != 0 end - @int_info(vendor_id, CL_DEVICE_VENDOR_ID, CL_uint) - @int_info(max_compute_units, CL_DEVICE_MAX_COMPUTE_UNITS, CL_uint) - @int_info(max_work_item_dims, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, CL_uint) - @int_info(max_clock_frequency, CL_DEVICE_MAX_CLOCK_FREQUENCY, CL_uint) - @int_info(address_bits, CL_DEVICE_ADDRESS_BITS, CL_uint) - @int_info(max_read_image_args, CL_DEVICE_MAX_READ_IMAGE_ARGS, CL_uint) - @int_info(max_write_image_args, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, CL_uint) - @int_info(global_mem_size, CL_DEVICE_GLOBAL_MEM_SIZE, CL_ulong) - @int_info(max_mem_alloc_size, CL_DEVICE_MAX_MEM_ALLOC_SIZE, CL_ulong) - @int_info(max_const_buffer_size, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, CL_ulong) - @int_info(local_mem_size, CL_DEVICE_LOCAL_MEM_SIZE, CL_ulong) + @int_info(vendor_id, CL_DEVICE_VENDOR_ID, Cuint) + @int_info(max_compute_units, CL_DEVICE_MAX_COMPUTE_UNITS, Cuint) + @int_info(max_work_item_dims, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, Cuint) + @int_info(max_clock_frequency, CL_DEVICE_MAX_CLOCK_FREQUENCY, Cuint) + @int_info(address_bits, CL_DEVICE_ADDRESS_BITS, Cuint) + @int_info(max_read_image_args, CL_DEVICE_MAX_READ_IMAGE_ARGS, Cuint) + @int_info(max_write_image_args, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, Cuint) + @int_info(global_mem_size, CL_DEVICE_GLOBAL_MEM_SIZE, Culong) + @int_info(max_mem_alloc_size, CL_DEVICE_MAX_MEM_ALLOC_SIZE, Culong) + @int_info(max_const_buffer_size, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, Culong) + @int_info(local_mem_size, CL_DEVICE_LOCAL_MEM_SIZE, Culong) has_local_mem(d::Device) = begin - result = Ref{CL_device_local_mem_type}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_LOCAL_MEM_TYPE, - sizeof(CL_device_local_mem_type), result, C_NULL) + result = Ref{cl_device_local_mem_type}() + clGetDeviceInfo(d.id, CL_DEVICE_LOCAL_MEM_TYPE, + sizeof(cl_device_local_mem_type), result, C_NULL) return result[] == CL_LOCAL end host_unified_memory(d::Device) = begin - result = Ref{CL_bool}(CL_FALSE) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_HOST_UNIFIED_MEMORY, - sizeof(CL_bool), result, C_NULL) + result = Ref{cl_bool}(CL_FALSE) + clGetDeviceInfo(d.id, CL_DEVICE_HOST_UNIFIED_MEMORY, + sizeof(cl_bool), result, C_NULL) return result[] == CL_TRUE end available(d::Device) = begin - result = Ref{CL_bool}(CL_FALSE) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_AVAILABLE, - sizeof(CL_bool), result, C_NULL) + result = Ref{cl_bool}(CL_FALSE) + clGetDeviceInfo(d.id, CL_DEVICE_AVAILABLE, + sizeof(cl_bool), result, C_NULL) return result[] == CL_TRUE end compiler_available(d::Device) = begin - result = Ref{CL_bool}(CL_FALSE) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_COMPILER_AVAILABLE, - sizeof(CL_bool), result, C_NULL) + result = Ref{cl_bool}(CL_FALSE) + clGetDeviceInfo(d.id, CL_DEVICE_COMPILER_AVAILABLE, + sizeof(cl_bool), result, C_NULL) return result[] == CL_TRUE end max_work_item_size(d::Device) = begin - dims = Ref{CL_uint}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, - sizeof(CL_uint), dims, C_NULL) + dims = Ref{Cuint}() + clGetDeviceInfo(d.id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(Cuint), dims, C_NULL) result = Vector{Csize_t}(undef, dims[]) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_MAX_WORK_ITEM_SIZES, + clGetDeviceInfo(d.id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(Csize_t) * dims[], result, C_NULL) return tuple([Int(r) for r in result]...) end @@ -180,9 +180,9 @@ function info(d::Device, s::Symbol) max_image2d_shape(d::Device) = begin width = Ref{Csize_t}() height = Ref{Csize_t}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_IMAGE2D_MAX_WIDTH, + clGetDeviceInfo(d.id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(Csize_t), width, C_NULL) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, + clGetDeviceInfo(d.id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(Csize_t), height, C_NULL) return (width[], height[]) end @@ -191,11 +191,11 @@ function info(d::Device, s::Symbol) width = Ref{Csize_t}() height = Ref{Csize_t}() depth = Ref{Csize_t}() - @check api.clGetDeviceInfo(d.id, CL_DEVICE_IMAGE3D_MAX_WIDTH, + clGetDeviceInfo(d.id, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(Csize_t), width, C_NULL) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_IMAGE3D_MAX_HEIGHT, + clGetDeviceInfo(d.id, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(Csize_t), height, C_NULL) - @check api.clGetDeviceInfo(d.id, CL_DEVICE_IMAGE3D_MAX_DEPTH, + clGetDeviceInfo(d.id, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(Csize_t), depth, C_NULL) return (width[], height[], depth[]) end diff --git a/src/error.jl b/src/error.jl index 9fcb517f..2fb2e242 100644 --- a/src/error.jl +++ b/src/error.jl @@ -176,7 +176,7 @@ end Base.show(io::IO, err::OpenCLException) = Base.print(io, "OpenCL Error: $(err.msg)") struct CLError <: Exception - code::CL_int + code::Cint desc::Symbol function CLError(c::Integer) diff --git a/src/event.jl b/src/event.jl index e3856d9d..e1c175fc 100644 --- a/src/event.jl +++ b/src/event.jl @@ -3,11 +3,11 @@ abstract type CLEvent <: CLObject end mutable struct Event <: CLEvent - id :: CL_event + id::cl_event - function Event(evt_id::CL_event; retain=false) + function Event(evt_id; retain=false) if retain - @check api.clRetainEvent(evt_id) + clRetainEvent(evt_id) end evt = new(evt_id) finalizer(_finalize, evt) @@ -17,12 +17,12 @@ end # wait for completion before running finalizer mutable struct NannyEvent <: CLEvent - id::CL_event + id::cl_event obj::Any - function NannyEvent(evt_id::CL_event, obj::Any; retain=false) + function NannyEvent(evt_id, obj; retain=false) if retain - @check api.clRetainEvent(evt_id) + clRetainEvent(evt_id) end nanny_evt = new(evt_id, obj) finalizer(nanny_evt) do x @@ -36,12 +36,12 @@ end function _finalize(evt::CLEvent) if evt.id != C_NULL - @check api.clReleaseEvent(evt.id) + clReleaseEvent(evt.id) evt.id = C_NULL end end -NannyEvent(evt::Event, obj::Any; retain=false) = NannyEvent(evt.id, obj, retain=retain) +NannyEvent(evt::Event, obj; retain=false) = NannyEvent(evt.id, obj, retain=retain) Base.pointer(evt::CLEvent) = evt.id @@ -56,11 +56,11 @@ Base.getindex(evt::CLEvent, evt_info::Symbol) = info(evt, evt_info) @ocl_v1_1_only begin mutable struct UserEvent <: CLEvent - id :: CL_event + id::cl_event - function UserEvent(evt_id::CL_event, retain=false) + function UserEvent(evt_id::cl_event, retain=false) if retain - @check api.clRetainEvent(evt_id) + clRetainEvent(evt_id) end evt = new(evt_id) finalizer(_finalize, evt) @@ -69,15 +69,15 @@ Base.getindex(evt::CLEvent, evt_info::Symbol) = info(evt, evt_info) end function UserEvent(ctx::Context; retain=false) - status = Ref{CL_int}() - evt_id = api.clCreateUserEvent(ctx.id, status) + status = Ref{Cint}() + evt_id = clCreateUserEvent(ctx.id, status) if status[] != CL_SUCCESS throw(CLError(status[])) end try return UserEvent(evt_id, retain) catch err - @check api.clReleaseEvent(evt_id) + clReleaseEvent(evt_id) throw(err) end end @@ -89,18 +89,18 @@ Base.getindex(evt::CLEvent, evt_info::Symbol) = info(evt, evt_info) end function complete(evt::UserEvent) - @check api.clSetUserEventStatus(evt.id, CL_COMPLETE) + clSetUserEventStatus(evt.id, CL_COMPLETE) return evt end end struct _EventCB - handle :: Ptr{Nothing} - evt_id :: CL_event - status :: CL_int + handle::Ptr{Nothing} + evt_id::cl_event + status::Cint end -function event_notify(evt_id::CL_event, status::CL_int, payload::Ptr{Nothing}) +function event_notify(evt_id::cl_event, status::Cint, payload::Ptr{Nothing}) ptr = convert(Ptr{_EventCB}, payload) handle = unsafe_load(ptr).handle @@ -114,7 +114,7 @@ end function add_callback(evt::CLEvent, callback::Function) event_notify_ptr = @cfunction(event_notify, Nothing, - (CL_event, CL_int, Ptr{Cvoid})) + (cl_event, Cint, Ptr{Cvoid})) # The uv_callback is going to notify a task that, # then executes the real callback. @@ -125,7 +125,7 @@ function add_callback(evt::CLEvent, callback::Function) # isbits && isimmutable r_ecb = Ref(_EventCB(Base.unsafe_convert(Ptr{Cvoid}, cb), 0, 0)) - @check api.clSetEventCallback(evt.id, CL_COMPLETE, event_notify_ptr, r_ecb) + clSetEventCallback(evt.id, CL_COMPLETE, event_notify_ptr, r_ecb) @async begin try @@ -143,7 +143,7 @@ end function wait(evt::CLEvent) evt_id = [evt.id] - @check api.clWaitForEvents(cl_uint(1), pointer(evt_id)) + clWaitForEvents(cl_uint(1), pointer(evt_id)) return evt end @@ -151,7 +151,7 @@ function wait(evts::Vector{CLEvent}) evt_ids = [evt.id for evt in evts] if !isempty(evt_ids) nevents = cl_uint(length(evt_ids)) - @check api.clWaitForEvents(nevents, pointer(evt_ids)) + clWaitForEvents(nevents, pointer(evt_ids)) end return evts end @@ -161,8 +161,8 @@ end wait_for::Vector{CLEvent}) n_wait_events = cl_uint(length(wait_for)) wait_evt_ids = [evt.id for evt in wait_for] - ret_evt = Ref{CL_event}() - @check api.clEnqueueMarkerWithWaitList(q.id, n_wait_events, + ret_evt = Ref{cl_event}() + clEnqueueMarkerWithWaitList(q.id, n_wait_events, isempty(wait_evt_ids) ? C_NULL : wait_evt_ids, ret_evt) @return_event ret_evt[] @@ -172,8 +172,8 @@ end wait_for::Vector{CLEvent}) n_wait_events = cl_uint(length(wait_for)) wait_evt_ids = [evt.id for evt in wait_for] - ret_evt = Ref{CL_event}() - @check api.clEnqueueBarrierWithWaitList(q.id, n_wait_events, + ret_evt = Ref{cl_event}() + clEnqueueBarrierWithWaitList(q.id, n_wait_events, isempty(wait_evt_ids) ? C_NULL : wait_evt_ids, ret_evt) @return_event ret_evt[] @@ -181,8 +181,8 @@ end end function enqueue_marker(q::CmdQueue) - evt = Ref{CL_event}() - @check api.clEnqueueMarker(q.id, evt) + evt = Ref{cl_event}() + clEnqueueMarker(q.id, evt) @return_event evt[] end @deprecate enqueue_marker enqueue_marker_with_wait_list @@ -190,7 +190,7 @@ end function enqueue_wait_for_events(q::CmdQueue, wait_for::Vector{T}) where {T<:CLEvent} n_wait_events = cl_uint(length(wait_for)) wait_evt_ids = [evt.id for evt in wait_for] - @check api.clEnqueueWaitForEvents(q.id, n_wait_events, + clEnqueueWaitForEvents(q.id, n_wait_events, isempty(wait_evt_ids) ? C_NULL : pointer(wait_evt_ids)) end @@ -199,7 +199,7 @@ function enqueue_wait_for_events(q::CmdQueue, wait_for::CLEvent) end function enqueue_barrier(q::CmdQueue) - @check api.clEnqueueBarrier(q.id) + clEnqueueBarrier(q.id) return q end @deprecate enqueue_barrier enqueue_barrier_with_wait_list @@ -221,19 +221,19 @@ end macro profile_info(func, profile_info) quote function $(esc(func))(evt::CLEvent) - time = Ref{CL_long}(0) - err_code = api.clGetEventProfilingInfo(evt.id, $(esc(profile_info)), - sizeof(CL_ulong), time, C_NULL) - if err_code != CL_SUCCESS - if err_code == CL_PROFILING_INFO_NOT_AVAILABLE - if evt[:status] != :complete - #TODO: evt must have status complete before it can be profiled - throw(CLError(err_code)) - else - #TODO: queue must be created with :profile argument - throw(CLError(err_code)) - end + time = Ref{Clong}(0) + err_code = unchecked_clGetEventProfilingInfo(evt.id, $(esc(profile_info)), + sizeof(Culong), time, C_NULL) + if err_code == CL_PROFILING_INFO_NOT_AVAILABLE + if evt[:status] != :complete + #TODO: evt must have status complete before it can be profiled + throw(CLError(err_code)) + else + #TODO: queue must be created with :profile argument + throw(CLError(err_code)) end + end + if err_code != CL_SUCCESS throw(CLError(err_code)) end return time[] @@ -243,37 +243,37 @@ end function info(evt::CLEvent, evt_info::Symbol) command_queue(evt::CLEvent) = begin - cmd_q = Ref{CL_command_queue}() - @check api.clGetEventInfo(evt.id, CL_EVENT_COMMAND_QUEUE, - sizeof(CL_command_queue), cmd_q, C_NULL) + cmd_q = Ref{cl_command_queue}() + clGetEventInfo(evt.id, CL_EVENT_COMMAND_QUEUE, + sizeof(cl_command_queue), cmd_q, C_NULL) return CmdQueue(cmd_q[]) end command_type(evt::CLEvent) = begin - cmd_t = Ref{CL_int}() - @check api.clGetEventInfo(evt.id, CL_EVENT_COMMAND_TYPE, - sizeof(CL_int), cmd_t, C_NULL) + cmd_t = Ref{Cint}() + clGetEventInfo(evt.id, CL_EVENT_COMMAND_TYPE, + sizeof(Cint), cmd_t, C_NULL) return cmd_t[] end reference_count(evt::CLEvent) = begin - cnt = Ref{CL_uint}() - @check api.clGetEventInfo(evt.id, CL_EVENT_REFERENCE_COUNT, - sizeof(CL_uint), cnt, C_NULL) + cnt = Ref{Cuint}() + clGetEventInfo(evt.id, CL_EVENT_REFERENCE_COUNT, + sizeof(Cuint), cnt, C_NULL) return cnt[] end context(evt::CLEvent) = begin - ctx = Ref{CL_context}() - @check api.clGetEventInfo(evt.id, CL_EVENT_CONTEXT, - sizeof(CL_context), CL_context, C_NULL) + ctx = Ref{cl_context}() + clGetEventInfo(evt.id, CL_EVENT_CONTEXT, + sizeof(cl_context), cl_context, C_NULL) Context(ctx[]) end status(evt::CLEvent) = begin - st = Ref{CL_int}() - @check api.clGetEventInfo(evt.id, CL_EVENT_COMMAND_EXECUTION_STATUS, - sizeof(CL_int), st, C_NULL) + st = Ref{Cint}() + clGetEventInfo(evt.id, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(Cint), st, C_NULL) status = st[] if status == CL_QUEUED return :queued diff --git a/src/kernel.jl b/src/kernel.jl index 1d4c2a91..94105c00 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -1,10 +1,10 @@ # OpenCL.Kernel mutable struct Kernel <: CLObject - id :: CL_kernel + id::cl_kernel - function Kernel(k::CL_kernel, retain=false) + function Kernel(k::cl_kernel, retain=false) if retain - @check api.clRetainKernel(k) + clRetainKernel(k) end kernel = new(k) finalizer(_finalize, kernel) @@ -14,7 +14,7 @@ end function _finalize(k::Kernel) if k.id != C_NULL - @check api.clReleaseKernel(k.id) + clReleaseKernel(k.id) k.id = C_NULL end end @@ -34,8 +34,8 @@ function Kernel(p::Program, kernel_name::String) throw(ArgumentError(msg)) end end - err_code = Ref{CL_int}() - kernel_id = api.clCreateKernel(p.id, kernel_name, err_code) + err_code = Ref{Cint}() + kernel_id = clCreateKernel(p.id, kernel_name, err_code) if err_code[] != CL_SUCCESS throw(CLError(err_code[])) end @@ -59,7 +59,7 @@ Base.length(l::LocalMem{T}) where {T} = Int(l.nbytes ÷ sizeof(T)) function set_arg!(k::Kernel, idx::Integer, arg::Nothing) @assert idx > 0 - @check api.clSetKernelArg(k.id, cl_uint(idx-1), sizeof(CL_mem), C_NULL) + clSetKernelArg(k.id, cl_uint(idx-1), sizeof(cl_mem), C_NULL) return k end @@ -73,13 +73,13 @@ end function set_arg!(k::Kernel, idx::Integer, arg::CLMemObject) @assert idx > 0 arg_boxed = Ref{typeof(arg.id)}(arg.id) - @check api.clSetKernelArg(k.id, cl_uint(idx-1), sizeof(CL_mem), arg_boxed) + clSetKernelArg(k.id, cl_uint(idx-1), sizeof(cl_mem), arg_boxed) return k end function set_arg!(k::Kernel, idx::Integer, arg::LocalMem) @assert idx > 0 "Kernel idx must be bigger 0" - @check api.clSetKernelArg(k.id, cl_uint(idx-1), arg.nbytes, C_NULL) + clSetKernelArg(k.id, cl_uint(idx-1), arg.nbytes, C_NULL) return k end @@ -181,7 +181,7 @@ end function set_arg!(k::Kernel, idx::Integer, arg::T) where T @assert idx > 0 "Kernel idx must be bigger 0" ref, tsize = to_cl_ref(arg) - err = api.clSetKernelArg(k.id, cl_uint(idx - 1), tsize, ref) + err = unchecked_clSetKernelArg(k.id, cl_uint(idx - 1), tsize, ref) if err == CL_INVALID_ARG_SIZE error(""" Julia and OpenCL type don't match at kernel argument $idx: Found $T. @@ -207,7 +207,9 @@ function set_arg!(k::Kernel, idx::Integer, arg::T) where T You can use `c.datatype_align(T)` to figure out the alignment of a Julia type! """) end - @check err + if err != CL_SUCCESS + throw(CLError(err)) + end return k end @@ -217,12 +219,12 @@ function set_args!(k::Kernel, args...) end end -function work_group_info(k::Kernel, winfo::CL_kernel_work_group_info, d::Device) +function work_group_info(k::Kernel, winfo, d::Device) if (winfo == CL_KERNEL_LOCAL_MEM_SIZE || winfo == CL_KERNEL_PRIVATE_MEM_SIZE) - result1 = Ref{CL_ulong}(0) - @check api.clGetKernelWorkGroupInfo(k.id, d.id, winfo, - sizeof(CL_ulong), result1, C_NULL) + result1 = Ref{Culong}(0) + clGetKernelWorkGroupInfo(k.id, d.id, winfo, + sizeof(Culong), result1, C_NULL) return Int(result1[]) elseif winfo == CL_KERNEL_COMPILE_WORK_GROUP_SIZE # Intel driver has a bug so we can't query the required size. @@ -230,12 +232,12 @@ function work_group_info(k::Kernel, winfo::CL_kernel_work_group_info, d::Device) # [1] https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetKernelWorkGroupInfo.html @assert sizeof(Csize_t) == sizeof(Int) result2 = Vector{Int}(undef, 3) - @check api.clGetKernelWorkGroupInfo(k.id, d.id, winfo, 3*sizeof(Int), result2, C_NULL) + clGetKernelWorkGroupInfo(k.id, d.id, winfo, 3*sizeof(Int), result2, C_NULL) return result2 else result = Ref{Csize_t}(0) - @check api.clGetKernelWorkGroupInfo(k.id, d.id, winfo, - sizeof(CL_ulong), result, C_NULL) + clGetKernelWorkGroupInfo(k.id, d.id, winfo, + sizeof(Culong), result, C_NULL) return Int(result[]) end end @@ -348,8 +350,8 @@ function enqueue_kernel(q::CmdQueue, wait_event_ids = C_NULL end - ret_event = Ref{CL_event}() - @check api.clEnqueueNDRangeKernel(q.id, k.id, cl_uint(work_dim), goffset, gsize, lsize, + ret_event = Ref{cl_event}() + clEnqueueNDRangeKernel(q.id, k.id, cl_uint(work_dim), goffset, gsize, lsize, n_events, wait_event_ids, ret_event) return Event(ret_event[], retain=false) end @@ -369,54 +371,54 @@ function enqueue_task(q::CmdQueue, k::Kernel; wait_for=nothing) evt_ids = [evt.id for evt in wait_for] end end - ret_event = Ref{CL_event}() - @check api.clEnqueueTask(q.id, k.id, n_evts, evt_ids, ret_event) + ret_event = Ref{cl_event}() + clEnqueueTask(q.id, k.id, n_evts, evt_ids, ret_event) return ret_event[] end function info(k::Kernel, kinfo::Symbol) name(k::Kernel) = begin size = Ref{Csize_t}() - @check api.clGetKernelInfo(k.id, CL_KERNEL_FUNCTION_NAME, + clGetKernelInfo(k.id, CL_KERNEL_FUNCTION_NAME, 0, C_NULL, size) result = Vector{Cchar}(undef, size[]) - @check api.clGetKernelInfo(k.id, CL_KERNEL_FUNCTION_NAME, + clGetKernelInfo(k.id, CL_KERNEL_FUNCTION_NAME, size[], result, size) return CLString(result) end num_args(k::Kernel) = begin - ret = Ref{CL_uint}() - @check api.clGetKernelInfo(k.id, CL_KERNEL_NUM_ARGS, - sizeof(CL_uint), ret, C_NULL) + ret = Ref{Cuint}() + clGetKernelInfo(k.id, CL_KERNEL_NUM_ARGS, + sizeof(Cuint), ret, C_NULL) return ret[] end reference_count(k::Kernel) = begin - ret = Ref{CL_uint}() - @check api.clGetKernelInfo(k.id, CL_KERNEL_REFERENCE_COUNT, - sizeof(CL_uint), ret, C_NULL) + ret = Ref{Cuint}() + clGetKernelInfo(k.id, CL_KERNEL_REFERENCE_COUNT, + sizeof(Cuint), ret, C_NULL) return ret[] end program(k::Kernel) = begin - ret = Ref{CL_program}() - @check api.clGetKernelInfo(k.id, CL_KERNEL_PROGRAM, - sizeof(CL_program), ret, C_NULL) + ret = Ref{cl_program}() + clGetKernelInfo(k.id, CL_KERNEL_PROGRAM, + sizeof(cl_program), ret, C_NULL) return Program(ret[], retain=true) end # Only supported for version 1.2 and above attributes(k::Kernel) = begin size = Ref{Csize_t}() - rcode = api.clGetKernelInfo(k.id, CL_KERNEL_ATTRIBUTES, - 0, C_NULL, size) + rcode = unchecked_clGetKernelInfo(k.id, CL_KERNEL_ATTRIBUTES, + 0, C_NULL, size) # Version 1.1 mostly MESA drivers will pass through the below condition if rcode == CL_INVALID_VALUE || size[] <= 1 return "" end - result = Vector{CL_char}(undef, size[]) - @check api.clGetKernelInfo(k.id, CL_KERNEL_ATTRIBUTES, + result = Vector{Cchar}(undef, size[]) + clGetKernelInfo(k.id, CL_KERNEL_ATTRIBUTES, size[], result, size) return CLString(result) end diff --git a/src/macros.jl b/src/macros.jl index 4cb814d6..01979eb2 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -1,24 +1,3 @@ -macro check(clfunc) - quote - local err::CL_int - err = $(esc(clfunc)) - if err != CL_SUCCESS - throw(CLError(err)) - end - err - end -end - -macro check_release(clfunc) - quote - local err::CL_int - err = $(esc(clfunc)) - if err != CL_SUCCESS - error("release! $($(string(clfunc))) failed with code $err.") - end - end -end - #TODO: these are just stubs for future expanded versions macro ocl_v1_1_only(ex) quote @@ -38,7 +17,7 @@ macro return_event(evt) try return Event(evt, retain=false) catch err - @check api.clReleaseEvent(evt) + clReleaseEvent(evt) throw(err) end end @@ -50,7 +29,7 @@ macro return_nanny_event(evt, obj) try return NannyEvent(evt, $(esc(obj))) catch err - @check api.clReleaseEvent(evt) + clReleaseEvent(evt) throw(err) end end diff --git a/src/memory.jl b/src/memory.jl index a1ce0af3..52fc9dbe 100644 --- a/src/memory.jl +++ b/src/memory.jl @@ -5,7 +5,7 @@ abstract type CLMemObject <: CLObject end #This should be implemented by all subtypes # type CLMemType <: CLMemObject # valid::Bool -# id::CL_mem +# id::cl_mem # ... # end @@ -13,7 +13,7 @@ Base.pointer(mem::CLMemObject) = mem.id Base.sizeof(mem::CLMemObject) = begin val = Ref{Csize_t}(0) - @check api.clGetMemObjectInfo(mem.id, CL_MEM_SIZE, sizeof(Csize_t), + clGetMemObjectInfo(mem.id, CL_MEM_SIZE, sizeof(Csize_t), val, C_NULL) return val[] end @@ -23,15 +23,15 @@ function _finalize(mem::CLMemObject) throw(CLMemoryError("attempted to double free mem object $mem")) end if mem.id != C_NULL - @check_release api.clReleaseMemObject(mem.id) + clReleaseMemObject(mem.id) mem.id = C_NULL end mem.valid = false end context(mem::CLMemObject) = begin - param = Ref{CL_context}() - @check api.clGetMemObjectInfo(mem.id, CL_MEM_CONTEXT, + param = Ref{cl_context}() + clGetMemObjectInfo(mem.id, CL_MEM_CONTEXT, sizeof(Csize_t), param, C_NULL) return Context(param[], retain=true) end @@ -39,16 +39,16 @@ end function info(mem::CLMemObject, minfo::Symbol) mem_type(m::CLMemObject) = begin - result = Ref{CL_mem_object_type}() - @check api.clGetMemObjectInfo(m.id, CL_MEM_TYPE, - sizeof(CL_mem_object_type), result, C_NULL) + result = Ref{cl_mem_object_type}() + clGetMemObjectInfo(m.id, CL_MEM_TYPE, + sizeof(cl_mem_object_type), result, C_NULL) return result[] end mem_flags(m::CLMemObject) = begin - result = Ref{CL_mem_flags}() - @check api.clGetMemObjectInfo(m.id, CL_MEM_FLAGS, - sizeof(CL_mem_flags), result, C_NULL) + result = Ref{cl_mem_flags}() + clGetMemObjectInfo(m.id, CL_MEM_FLAGS, + sizeof(cl_mem_flags), result, C_NULL) mf = result[] flags = Symbol[] if (mf & CL_MEM_READ_WRITE) != 0 @@ -74,22 +74,22 @@ function info(mem::CLMemObject, minfo::Symbol) size(m::CLMemObject) = begin result = Ref{Csize_t}() - @check api.clGetMemObjectInfo(m.id, CL_MEM_SIZE, + clGetMemObjectInfo(m.id, CL_MEM_SIZE, sizeof(Csize_t), result, C_NULL) return result[] end reference_count(m::CLMemObject) = begin - result = Ref{CL_uint}() - @check api.clGetMemObjectInfo(m.id, CL_MEM_REFERENCE_COUNT, - sizeof(CL_uint), result, C_NULL) + result = Ref{Cuint}() + clGetMemObjectInfo(m.id, CL_MEM_REFERENCE_COUNT, + sizeof(Cuint), result, C_NULL) return result[] end map_count(m::CLMemObject) = begin - result = Ref{CL_uint}() - @check api.clGetMemObjectInfo(m.id, CL_MEM_MAP_COUNT, - sizeof(CL_uint), result, C_NULL) + result = Ref{Cuint}() + clGetMemObjectInfo(m.id, CL_MEM_MAP_COUNT, + sizeof(Cuint), result, C_NULL) return result[] end diff --git a/src/platform.jl b/src/platform.jl index 75e2eebf..435dfb5a 100644 --- a/src/platform.jl +++ b/src/platform.jl @@ -1,13 +1,13 @@ # OpenCL.Platform struct Platform <: CLObject - id::CL_platform_id + id::cl_platform_id end Base.pointer(p::Platform) = p.id function info(p::Platform, pinfo::Symbol) - info_map = Dict{Symbol, CL_platform_info}( + info_map = Dict{Symbol, cl_platform_info}( :profile => CL_PLATFORM_PROFILE, :version => CL_PLATFORM_VERSION, :name => CL_PLATFORM_NAME, @@ -39,36 +39,36 @@ function Base.show(io::IO, p::Platform) end function platforms() - nplatforms = Ref{CL_uint}() - @check api.clGetPlatformIDs(0, C_NULL, nplatforms) - cl_platform_ids = Vector{CL_platform_id}(undef, nplatforms[]) - @check api.clGetPlatformIDs(nplatforms[], cl_platform_ids, C_NULL) + nplatforms = Ref{Cuint}() + clGetPlatformIDs(0, C_NULL, nplatforms) + cl_platform_ids = Vector{cl_platform_id}(undef, nplatforms[]) + clGetPlatformIDs(nplatforms[], cl_platform_ids, C_NULL) return [Platform(id) for id in cl_platform_ids] end function num_platforms() - nplatforms = Ref{CL_uint}() - @check api.clGetPlatformIDs(0, C_NULL, nplatforms) + nplatforms = Ref{Cuint}() + clGetPlatformIDs(0, C_NULL, nplatforms) return Int(nplatforms[]) end -function info(p::Platform, pinfo::CL_platform_info) +function info(p::Platform, pinfo) size = Ref{Csize_t}() - @check api.clGetPlatformInfo(p.id, pinfo, 0, C_NULL, size) - result = Vector{CL_char}(undef, size[]) - @check api.clGetPlatformInfo(p.id, pinfo, size[], result, C_NULL) + clGetPlatformInfo(p.id, pinfo, 0, C_NULL, size) + result = Vector{Cchar}(undef, size[]) + clGetPlatformInfo(p.id, pinfo, size[], result, C_NULL) return CLString(result) end -function devices(p::Platform, dtype::CL_device_type) +function devices(p::Platform, dtype) try - ndevices = Ref{CL_uint}() - @check api.clGetDeviceIDs(p.id, dtype, 0, C_NULL, ndevices) + ndevices = Ref{Cuint}() + clGetDeviceIDs(p.id, dtype, 0, C_NULL, ndevices) if ndevices[] == 0 return Device[] end - result = Vector{CL_device_id}(undef, ndevices[]) - @check api.clGetDeviceIDs(p.id, dtype, ndevices[], result, C_NULL) + result = Vector{cl_device_id}(undef, ndevices[]) + clGetDeviceIDs(p.id, dtype, ndevices[], result, C_NULL) return Device[Device(id) for id in result] catch err if err.desc == :CL_DEVICE_NOT_FOUND || err.code == -1 @@ -85,7 +85,7 @@ function devices(p::Platform, dtype::Symbol) devices(p, cl_device_type(dtype)) end -function devices(dtype::CL_device_type) +function devices(dtype) devs = Device[] for platform in platforms() append!(devs, devices(platform, dtype)) diff --git a/src/program.jl b/src/program.jl index 5763f3be..e805609e 100644 --- a/src/program.jl +++ b/src/program.jl @@ -3,13 +3,13 @@ using Printf mutable struct Program <: CLObject - id::CL_program + id::cl_program binary::Bool - function Program(program_id::CL_program; + function Program(program_id::cl_program; retain::Bool=false, binary::Bool=false) if retain - @check api.clRetainProgram(program_id) + clRetainProgram(program_id) end p = new(program_id, binary) finalizer(_finalize, p) @@ -19,7 +19,7 @@ end function _finalize(p::Program) if p.id != C_NULL - @check api.clReleaseProgram(p.id) + clReleaseProgram(p.id) p.id = C_NULL end end @@ -35,14 +35,14 @@ Base.pointer(p::Program) = p.id Base.getindex(p::Program, pinfo::Symbol) = info(p, pinfo) function Program(ctx::Context; source=nothing, binaries=nothing) - local program_id::CL_program + local program_id::cl_program if source !== nothing && binaries !== nothing throw(ArgumentError("Program be source or binary")) end if source !== nothing byte_source = [String(source)] - err_code = Ref{CL_int}() - program_id = api.clCreateProgramWithSource(ctx.id, 1, byte_source, C_NULL, err_code) + err_code = Ref{Cint}() + program_id = clCreateProgramWithSource(ctx.id, 1, byte_source, C_NULL, err_code) if err_code[] != CL_SUCCESS throw(CLError(err_code[])) end @@ -50,9 +50,9 @@ function Program(ctx::Context; source=nothing, binaries=nothing) elseif binaries !== nothing ndevices = length(binaries) - device_ids = Vector{CL_device_id}(undef, ndevices) + device_ids = Vector{cl_device_id}(undef, ndevices) bin_lengths = Vector{Csize_t}(undef, ndevices) - binary_status = Vector{CL_int}(undef, ndevices) + binary_status = Vector{Cint}(undef, ndevices) binary_ptrs= Vector{Ptr{UInt8}}(undef, ndevices) try for (i, (dev, bin)) in enumerate(binaries) @@ -60,8 +60,8 @@ function Program(ctx::Context; source=nothing, binaries=nothing) bin_lengths[i] = length(bin) binary_ptrs[i] = Base.unsafe_convert(Ptr{UInt8}, pointer(bin)) end - err_code = Ref{CL_int}() - program_id = api.clCreateProgramWithBinary(ctx.id, ndevices, device_ids, bin_lengths, + err_code = Ref{Cint}() + program_id = clCreateProgramWithBinary(ctx.id, ndevices, device_ids, bin_lengths, binary_ptrs, binary_status, err_code) if err_code[] != CL_SUCCESS throw(CLError(err_code[])) @@ -89,10 +89,7 @@ function build!(p::Program; options = "", raise = true) opts = String(options) ndevices = 0 device_ids = C_NULL - err = api.clBuildProgram(p.id, cl_uint(ndevices), device_ids, opts, C_NULL, C_NULL) - if err != CL_BUILD_PROGRAM_FAILURE - @check err - end + err = unchecked_clBuildProgram(p.id, cl_uint(ndevices), device_ids, opts, C_NULL, C_NULL) for (dev, status) in cl.info(p, :build_status) if status == cl.CL_BUILD_ERROR println(stderr, "Couldn't compile kernel: ") @@ -100,33 +97,36 @@ function build!(p::Program; options = "", raise = true) print_with_linenumbers(source, " ", stderr) println(stderr, "With following build error:") println(stderr, cl.info(p, :build_log)[dev]) - raise && @check err # throw the build error when raise! + raise && err # throw the build error when raise! end end + if err != CL_SUCCESS + throw(CLError(err)) + end return p end function info(p::Program, pinfo::Symbol) num_devices(p::Program) = begin - ret = Ref{CL_uint}() - @check api.clGetProgramInfo(p.id, CL_PROGRAM_NUM_DEVICES, sizeof(ret), ret, C_NULL) + ret = Ref{Cuint}() + clGetProgramInfo(p.id, CL_PROGRAM_NUM_DEVICES, sizeof(ret), ret, C_NULL) return ret[] end devices(p::Program) = begin ndevices = num_devices(p) - device_ids = Vector{CL_device_id}(undef, ndevices) - @check api.clGetProgramInfo(p.id, CL_PROGRAM_DEVICES, - sizeof(CL_device_id) * ndevices, device_ids, C_NULL) + device_ids = Vector{cl_device_id}(undef, ndevices) + clGetProgramInfo(p.id, CL_PROGRAM_DEVICES, + sizeof(cl_device_id) * ndevices, device_ids, C_NULL) return [Device(device_ids[i]) for i in 1:ndevices] end build_status(p::Program) = begin - status_dict = Dict{Device, CL_build_status}() - status = Ref{CL_build_status}() + status_dict = Dict{Device, cl_build_status}() + status = Ref{cl_build_status}() for d in devices(p) - @check api.clGetProgramBuildInfo(p.id, d.id, CL_PROGRAM_BUILD_STATUS, - sizeof(CL_build_status), status, C_NULL) + clGetProgramBuildInfo(p.id, d.id, CL_PROGRAM_BUILD_STATUS, + sizeof(cl_build_status), status, C_NULL) status_dict[d] = status[] end return status_dict @@ -136,15 +136,15 @@ function info(p::Program, pinfo::Symbol) logs = Dict{Device, String}() for d in devices(p) log_len = Ref{Csize_t}() - @check api.clGetProgramBuildInfo(p.id, d.id, CL_PROGRAM_BUILD_LOG, - 0, C_NULL, log_len) + clGetProgramBuildInfo(p.id, d.id, CL_PROGRAM_BUILD_LOG, + 0, C_NULL, log_len) if log_len[] == 0 logs[d] = "" continue end - log_bytestring = Vector{CL_char}(undef, log_len[]) - @check api.clGetProgramBuildInfo(p.id, d.id, CL_PROGRAM_BUILD_LOG, - log_len[], log_bytestring, C_NULL) + log_bytestring = Vector{Cchar}(undef, log_len[]) + clGetProgramBuildInfo(p.id, d.id, CL_PROGRAM_BUILD_LOG, + log_len[], log_bytestring, C_NULL) logs[d] = CLString(log_bytestring) end return logs @@ -153,11 +153,11 @@ function info(p::Program, pinfo::Symbol) binaries(p::Program) = begin binary_dict = Dict{Device, Array{UInt8}}() slen = Ref{Csize_t}() - @check api.clGetProgramInfo(p.id, CL_PROGRAM_BINARY_SIZES, + clGetProgramInfo(p.id, CL_PROGRAM_BINARY_SIZES, 0, C_NULL, slen) sizes = zeros(Csize_t, slen[]) - @check api.clGetProgramInfo(p.id, CL_PROGRAM_BINARY_SIZES, + clGetProgramInfo(p.id, CL_PROGRAM_BINARY_SIZES, slen[], sizes, C_NULL) bins = Vector{Ptr{UInt8}}(undef, length(sizes)) # keep a reference to the underlying binary arrays @@ -173,7 +173,7 @@ function info(p::Program, pinfo::Symbol) bins[i] = Base.unsafe_convert(Ptr{UInt8}, C_NULL) end end - @check api.clGetProgramInfo(p.id, CL_PROGRAM_BINARIES, + clGetProgramInfo(p.id, CL_PROGRAM_BINARIES, length(sizes) * sizeof(Ptr{UInt8}), bins, C_NULL) bidx = 1 @@ -189,24 +189,24 @@ function info(p::Program, pinfo::Symbol) source(p::Program) = begin p.binary && throw(CLError(-45)) src_len = Ref{Csize_t}() - @check api.clGetProgramInfo(p.id, CL_PROGRAM_SOURCE, 0, C_NULL, src_len) + clGetProgramInfo(p.id, CL_PROGRAM_SOURCE, 0, C_NULL, src_len) src_len[] <= 1 && return nothing src = Vector{Cchar}(undef, src_len[]) - @check api.clGetProgramInfo(p.id, CL_PROGRAM_SOURCE, src_len[], src, C_NULL) + clGetProgramInfo(p.id, CL_PROGRAM_SOURCE, src_len[], src, C_NULL) return CLString(src) end context(p::Program) = begin - ret = Ref{CL_context}() - @check api.clGetProgramInfo(p.id, CL_PROGRAM_CONTEXT, - sizeof(CL_context), ret, C_NULL) + ret = Ref{cl_context}() + clGetProgramInfo(p.id, CL_PROGRAM_CONTEXT, + sizeof(cl_context), ret, C_NULL) return Context(ret[], retain = true) end reference_count(p::Program) = begin - ret = Ref{CL_uint}() - @check api.clGetProgramInfo(p.id, CL_PROGRAM_REFERENCE_COUNT, - sizeof(CL_uint), ret, C_NULL) + ret = Ref{Cuint}() + clGetProgramInfo(p.id, CL_PROGRAM_REFERENCE_COUNT, + sizeof(Cuint), ret, C_NULL) return ret[] end diff --git a/src/queue.jl b/src/queue.jl index 59dde1a7..ba3c19ad 100644 --- a/src/queue.jl +++ b/src/queue.jl @@ -1,17 +1,17 @@ # OpenCL.CmdQueue mutable struct CmdQueue <: CLObject - id::CL_command_queue + id::cl_command_queue - function CmdQueue(q_id::CL_command_queue; retain=false) + function CmdQueue(q_id::cl_command_queue; retain=false) if retain - @check api.clRetainCommandQueue(q_id) + clRetainCommandQueue(q_id) end q = new(q_id) finalizer(q) do x retain || _deletecached!(q) if x.id != C_NULL - @check api.clReleaseCommandQueue(x.id) + clReleaseCommandQueue(x.id) x.id = C_NULL end end @@ -79,12 +79,12 @@ function CmdQueue(ctx::Context, dev::Device, props::NTuple{2,Symbol}) return CmdQueue(ctx, dev, flags) end -function CmdQueue(ctx::Context, dev::Device, props::CL_command_queue_properties) - err_code = Ref{CL_int}() - queue_id = api.clCreateCommandQueue(ctx.id, dev.id, props, err_code) +function CmdQueue(ctx::Context, dev::Device, props) + err_code = Ref{Cint}() + queue_id = clCreateCommandQueue(ctx.id, dev.id, props, err_code) if err_code[] != CL_SUCCESS if queue_id != C_NULL - api.clReleaseCommandQueue(queue_id) + clReleaseCommandQueue(queue_id) end throw(CLError(err_code[])) end @@ -92,41 +92,41 @@ function CmdQueue(ctx::Context, dev::Device, props::CL_command_queue_properties) end function flush(q::CmdQueue) - @check api.clFlush(q.id) + clFlush(q.id) return q end function finish(q::CmdQueue) - @check api.clFinish(q.id) + clFinish(q.id) return q end function info(q::CmdQueue, qinfo::Symbol) context(q::CmdQueue) = begin - ctx_id = Ref{CL_context}() - @check api.clGetCommandQueueInfo(q.id, CL_QUEUE_CONTEXT, - sizeof(CL_context), ctx_id, C_NULL) + ctx_id = Ref{cl_context}() + clGetCommandQueueInfo(q.id, CL_QUEUE_CONTEXT, + sizeof(cl_context), ctx_id, C_NULL) Context(ctx_id[], retain=true) end device(q::CmdQueue) = begin - dev_id = Ref{CL_device_id}() - @check api.clGetCommandQueueInfo(q.id, CL_QUEUE_DEVICE, - sizeof(CL_device_id), dev_id, C_NULL) + dev_id = Ref{cl_device_id}() + clGetCommandQueueInfo(q.id, CL_QUEUE_DEVICE, + sizeof(cl_device_id), dev_id, C_NULL) Device(dev_id[]) end reference_count(q::CmdQueue) = begin - ref_count = Ref{CL_uint}() - @check api.clGetCommandQueueInfo(q.id, CL_QUEUE_REFERENCE_COUNT, - sizeof(CL_uint), ref_count, C_NULL) + ref_count = Ref{Cuint}() + clGetCommandQueueInfo(q.id, CL_QUEUE_REFERENCE_COUNT, + sizeof(Cuint), ref_count, C_NULL) ref_count[] end properties(q::CmdQueue) = begin - props = Ref{CL_command_queue_properties}() - @check api.clGetCommandQueueInfo(q.id, CL_QUEUE_PROPERTIES, - sizeof(CL_command_queue_properties), + props = Ref{cl_command_queue_properties}() + clGetCommandQueueInfo(q.id, CL_QUEUE_PROPERTIES, + sizeof(cl_command_queue_properties), props, C_NULL) props[] end diff --git a/src/types.jl b/src/types.jl deleted file mode 100644 index bffdb04c..00000000 --- a/src/types.jl +++ /dev/null @@ -1,171 +0,0 @@ -#=== TypeAliases ===# - -# Opaque types -const CL_platform_id = Ptr{Nothing} -const CL_device_id = Ptr{Nothing} -const CL_context = Ptr{Nothing} -const CL_command_queue = Ptr{Nothing} -const CL_mem = Ptr{Nothing} -const CL_program = Ptr{Nothing} -const CL_kernel = Ptr{Nothing} -const CL_event = Ptr{Nothing} -const CL_sampler = Ptr{Nothing} - -# Scalar types -const CL_char = Int8 -const CL_uchar = UInt8 -const CL_short = Int16 -const CL_ushort = UInt16 -const CL_int = Int32 -const CL_uint = UInt32 -const CL_long = Int64 -const CL_ulong = UInt64 - -const CL_half = Float16 -const CL_float = Float32 -const CL_double = Float64 - -const CL_bool = CL_uint -const CL_bitfield = CL_ulong -const CL_device_type = CL_bitfield -const CL_platform_info = CL_uint -const CL_device_info = CL_uint -const CL_device_fp_config = CL_bitfield -const CL_device_mem_cache_type = CL_uint -const CL_device_local_mem_type = CL_uint -const CL_device_exec_capabilities = CL_bitfield -const CL_device_svm_capabilities = CL_bitfield -const CL_command_queue_properties = CL_bitfield -const CL_device_partition_property = Cssize_t #intptr_t -const CL_device_affinity_domain = CL_bitfield - -const CL_context_properties = Cssize_t #intptr_t -const CL_context_info = CL_uint -const CL_queue_properties = CL_bitfield -const CL_command_queue_info = CL_uint -const CL_channel_order = CL_uint -const CL_channel_type = CL_uint -const CL_mem_flags = CL_bitfield -const CL_svm_mem_flags = CL_bitfield -const CL_mem_object_type = CL_uint -const CL_mem_info = CL_uint -const CL_mem_migration_flags = CL_bitfield -const CL_image_info = CL_uint -const CL_buffer_create_type = CL_uint -const CL_addressing_mode = CL_uint -const CL_filter_mode = CL_uint -const CL_sampler_info = CL_uint -const CL_map_flags = CL_bitfield -const CL_pipe_properties = Cssize_t #intptr_t -const CL_pipe_info = CL_uint -const CL_program_info = CL_uint -const CL_program_build_info = CL_uint -const CL_build_status = CL_int -const CL_kernel_info = CL_uint -const CL_kernel_arg_info = CL_uint -const CL_kernel_work_group_info = CL_uint -const CL_event_info = CL_uint -const CL_command_type = CL_uint -const CL_profiling_info = CL_uint -const CL_sampler_properties = CL_bitfield -const CL_kernel_exec_info = CL_uint - -# Scalar OpenGL types ! We should get these from OpenGL.jl - -const GL_uint = UInt32 -const GL_int = Int32 - -const GL_enum = GL_uint - -# interop types - -const CL_GL_object_type = CL_uint -const CL_GL_texture_info = CL_uint -const CL_GL_platform_info = CL_uint -const CL_gl_context_info = CL_uint - - -const GL_sync = Ptr{Nothing} - -#=== Image Types ===# - -struct CL_image_format - image_channel_order::CL_channel_order - image_channel_data_type::CL_channel_type -end - -struct CL_image_desc - image_type::CL_mem_object_type - image_width::Csize_t - image_depth::Csize_t - image_array_size::Csize_t - image_row_pitch::Csize_t - image_slice_pitch::Csize_t - num_mip_levels::CL_uint - num_samples::CL_uint - buffer::CL_mem -end - -struct CL_buffer_region - origin::Csize_t - size::Csize_t -end - - -#=== Conversion Functions ===# - -cl_char(x) = Int8(x) -cl_uchar(x) = UInt8(x) -cl_short(x) = Int16(x) -cl_ushort(x) = UInt16(x) -cl_int(x) = Int32(x) -cl_uint(x) = UInt32(x) -cl_long(x) = Int64(x) -cl_ulong(x) = UInt64(x) - -cl_half(x) = UInt16(x) -cl_float(x) = Float32(x) -cl_double(x) = Float64(x) - -cl_bool(x) = x != 0 ? cl_uint(1) : cl_uint(0) -cl_bitfield(x) = cl_ulong(x) - -cl_command_queue_properties(x) = cl_ulong(x) -cl_device_type(x) = cl_bitfield(x) -cl_platform_info(x) = cl_uint(x) -cl_device_info(x) = cl_uint(x) -cl_device_fp_config(x) = cl_bitfield(x) -cl_device_mem_cache_type(x) = cl_uint(x) -cl_device_local_mem_type(x) = cl_uint(x) -cl_device_exec_capabilities(x) = cl_bitfield(x) -cl_device_svm_capabilities(x) = cl_bitfield(x) - -cl_context_properties(x) = convert(CL_context_properties, x) -cl_context_info(x) = cl_uint(x) -cl_queue_properties(x) = cl_bitfield(x) -cl_command_queue_info(x) = cl_uint(x) -cl_channel_order(x) = cl_uint(x) -cl_channel_type(x) = cl_uint(x) -cl_mem_flags(x) = cl_bitfield(x) -cl_svm_mem_flags(x) = cl_bitfield(x) -cl_mem_object_type(x) = cl_uint(x) -cl_mem_info(x) = cl_uint(x) -cl_image_info(x) = cl_uint(x) -cl_buffer_create_type(x) = cl_uint(x) -cl_addressing_mode(x) = cl_uint(x) -cl_filter_mode(x) = cl_uint(x) -cl_sampler_info(x) = cl_uint(x) -cl_map_flags(x) = cl_bitfield(x) -cl_pipe_properties(x) = convert(CL_pipe_properties, x) -cl_pipe_info(x) = cl_uint(x) -cl_program_info(x) = cl_uint(x) -cl_program_build_info(x) = cl_uint(x) -cl_build_status(x) = cl_int(x) -cl_kernel_info(x) = cl_uint(x) -cl_kernel_work_group_info(x) = cl_uint(x) -cl_event_info(x) = cl_uint(x) -cl_command_type(x) = cl_uint(x) -cl_profiling_info(x) = cl_uint(x) -cl_sampler_properties(x) = cl_bitfield(x) -cl_kernel_exec(x) = cl_uint(x) -cl_platform_id(x) = Ptr{Nothing}(x) diff --git a/src/util.jl b/src/util.jl index c9a31e8c..32453f72 100644 --- a/src/util.jl +++ b/src/util.jl @@ -5,15 +5,15 @@ function create_compute_context() return (device, ctx, queue) end -opencl_version(obj :: CLObject) = api.parse_version(obj[:version]) -opencl_version(c :: Context) = opencl_version(first(devices(c))) -opencl_version(q :: CmdQueue) = opencl_version(q[:device]) +opencl_version(obj::CLObject) = parse_version(obj[:version]) +opencl_version(c::Context) = opencl_version(first(devices(c))) +opencl_version(q::CmdQueue) = opencl_version(q[:device]) -const _versionDict = Dict{Ptr{Nothing}, VersionNumber}() +const _versionDict = Dict{Ptr, VersionNumber}() -_deletecached!(obj :: CLObject) = delete!(_versionDict, pointer(obj)) +_deletecached!(obj::CLObject) = delete!(_versionDict, pointer(obj)) -function check_version(obj :: CLObject, version :: VersionNumber) +function check_version(obj::CLObject, version::VersionNumber) version <= get!(_versionDict, pointer(obj)) do opencl_version(obj) end diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 00000000..382d28bc --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,4 @@ +[deps] +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd" diff --git a/test/array.jl b/test/array.jl new file mode 100644 index 00000000..e3b9bf30 --- /dev/null +++ b/test/array.jl @@ -0,0 +1,52 @@ +using .cl: CLArray + +using LinearAlgebra + +@testset "CLArray" begin + @testset "constructors" begin + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + hostarray = zeros(Float32, 128*64) + A = CLArray(queue, hostarray) + + @test CLArray(queue, (:rw, :copy), hostarray) != nothing + + @test CLArray(queue, hostarray, flags=(:rw, :copy)) != nothing + + @test CLArray(queue, hostarray) != nothing + + @test CLArray(cl.Buffer(Float32, ctx, length(hostarray), (:r, :copy), hostbuf=hostarray), + queue, + (128, 64)) != nothing + + @test copy(A) == A + end + + @testset "fill" begin + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + + @test cl.to_host(cl.fill(Float32, queue, Float32(0.5), + 32, 64)) == fill(Float32(0.5), 32, 64) + @test cl.to_host(cl.zeros(Float32, queue, 64)) == zeros(Float32, 64) + @test cl.to_host(cl.ones(Float32, queue, 64)) == ones(Float32, 64) + end + + @testset "core functions" begin + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + A = CLArray(queue, rand(Float32, 128, 64)) + @test size(A) == (128, 64) + @test ndims(A) == 2 + @test length(A) == 128*64 + # reshape + B = reshape(A, 128*64) + @test reshape(B, 128, 64) == A + # transpose + X = CLArray(queue, rand(Float32, 32, 32)) + B = cl.zeros(Float32, queue, 64, 128) + ev = transpose!(B, A) + cl.wait(ev) + #@test cl.to_host(copy(A')) == cl.to_host(B) + end +end diff --git a/test/behaviour.jl b/test/behaviour.jl new file mode 100644 index 00000000..22bfa5e5 --- /dev/null +++ b/test/behaviour.jl @@ -0,0 +1,277 @@ +#= +info( +"====================================================================== + Running Behavior Tests + ======================================================================") +=# +@testset "Hello World Test" begin + hello_world_kernel = " + #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable + + __constant char hw[] = \"hello world\"; + + __kernel void hello(__global char *out) { + int tid = get_global_id(0); + out[tid] = hw[tid]; + }" + + hello_world_str = "hello world" + + + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + + str_len = length(hello_world_str) + 1 + out_buf = cl.Buffer(Cchar, ctx, sizeof(Cchar) * str_len, :w) + + prg = cl.Program(ctx, source=hello_world_kernel) |> cl.build! + kern = cl.Kernel(prg, "hello") + + queue(kern, str_len, nothing, out_buf) + h = cl.read(queue, out_buf) + + @test cl.CLString(h) == hello_world_str +end + +@testset "Low Level API Test" begin + + test_source = " + __kernel void sum(__global const float *a, + __global const float *b, + __global float *c, + const unsigned int count) + { + unsigned int gid = get_global_id(0); + if (gid < count) { + c[gid] = a[gid] + b[gid]; + } + } + " + + len = 1024 + h_a = Vector{Cfloat}(undef, len) + h_b = Vector{Cfloat}(undef, len) + h_c = Vector{Cfloat}(undef, len) + h_d = Vector{Cfloat}(undef, len) + h_e = Vector{Cfloat}(undef, len) + h_f = Vector{Cfloat}(undef, len) + h_g = Vector{Cfloat}(undef, len) + + for i in 1:len + h_a[i] = Cfloat(rand()) + h_b[i] = Cfloat(rand()) + h_e[i] = Cfloat(rand()) + h_g[i] = Cfloat(rand()) + end + + err_code = Ref{cl.Cint}() + + # create compute context (TODO: fails if function ptr's not passed...) + ctx_id = cl.clCreateContext(C_NULL, 1, [device.id], + C_NULL, + C_NULL, + err_code) + if err_code[] != cl.CL_SUCCESS + throw(cl.CLError(err_code[])) + end + + q_id = cl.clCreateCommandQueue(ctx_id, device.id, 0, err_code) + if err_code[] != cl.CL_SUCCESS + error("Failed to create command queue") + end + + # create program + bytesource = String(test_source) + prg_id = cl.clCreateProgramWithSource(ctx_id, 1, [bytesource], C_NULL, err_code) + if err_code[] != cl.CL_SUCCESS + error("Failed to create program") + end + + # build program + cl.clBuildProgram(prg_id, 0, C_NULL, C_NULL, C_NULL, C_NULL) + + # create compute kernel + k_id = cl.clCreateKernel(prg_id, "sum", err_code) + if err_code[] != cl.CL_SUCCESS + error("Failed to create compute kernel") + end + + # create input array in device memory + Aid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, + sizeof(Cfloat) * len, h_a, err_code) + if err_code[] != cl.CL_SUCCESS + error("Error creating buffer A") + end + Bid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, + sizeof(Cfloat) * len, h_b, err_code) + if err_code[] != cl.CL_SUCCESS + error("Error creating buffer B") + end + Eid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_COPY_HOST_PTR, + sizeof(Cfloat) * len, h_e, err_code) + if err_code[] != cl.CL_SUCCESS + error("Error creating buffer E") + end + Gid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_COPY_HOST_PTR, + sizeof(Cfloat) * len, h_g, err_code) + if err_code[] != cl.CL_SUCCESS + error("Error creating buffer G") + end + + # create output arrays in device memory + + Cid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_READ_WRITE, + sizeof(Cfloat) * len, C_NULL, err_code) + if err_code[] != cl.CL_SUCCESS + error("Error creating buffer C") + end + Did = cl.clCreateBuffer(ctx_id, cl.CL_MEM_READ_WRITE, + sizeof(Cfloat) * len, C_NULL, err_code) + if err_code[] != cl.CL_SUCCESS + error("Error creating buffer D") + end + Fid = cl.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY, + sizeof(Cfloat) * len, C_NULL, err_code) + if err_code[] != cl.CL_SUCCESS + error("Error creating buffer F") + end + + cl.clSetKernelArg(k_id, 0, sizeof(cl.cl_mem), [Aid]) + cl.clSetKernelArg(k_id, 1, sizeof(cl.cl_mem), [Bid]) + cl.clSetKernelArg(k_id, 2, sizeof(cl.cl_mem), [Cid]) + cl.clSetKernelArg(k_id, 3, sizeof(cl.Cuint), cl.Cuint[len]) + + nglobal = Ref{Csize_t}(len) + cl.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL, + nglobal, C_NULL, 0, C_NULL, C_NULL) + + cl.clSetKernelArg(k_id, 0, sizeof(cl.cl_mem), [Eid]) + cl.clSetKernelArg(k_id, 1, sizeof(cl.cl_mem), [Cid]) + cl.clSetKernelArg(k_id, 2, sizeof(cl.cl_mem), [Did]) + cl.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL, + nglobal, C_NULL, 0, C_NULL, C_NULL) + + cl.clSetKernelArg(k_id, 0, sizeof(cl.cl_mem), [Gid]) + cl.clSetKernelArg(k_id, 1, sizeof(cl.cl_mem), [Did]) + cl.clSetKernelArg(k_id, 2, sizeof(cl.cl_mem), [Fid]) + cl.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL, + nglobal, C_NULL, 0, C_NULL, C_NULL) + + # read back the result from compute device... + cl.clEnqueueReadBuffer(q_id, Fid, cl.CL_TRUE, 0, + sizeof(Cfloat) * len, h_f, 0, C_NULL, C_NULL) + + # test results + for i in 1:len + tmp = h_a[i] + h_b[i] + h_e[i] + h_g[i] + @test tmp ≈ h_f[i] + end +end + +struct Params + A::Float32 + B::Float32 + #TODO: fixed size arrays? + X1::Float32 + X2::Float32 + C::Int32 + Params(a, b, x, c) = begin + new(Float32(a), + Float32(b), + Float32(x[1]), + Float32(x[2]), + Int32(c)) + end +end + +let test_struct = " + typedef struct Params + { + float A; + float B; + float x[2]; //padding + int C; + } Params; + + + __kernel void part3(__global const float *a, + __global const float *b, + __global float *c, + __constant struct Params* test) + { + int gid = get_global_id(0); + c[gid] = test->A * a[gid] + test->B * b[gid] + test->C; + } +" + +@testset "Struct Buffer Test" begin + ctx = cl.Context(device) + q = cl.CmdQueue(ctx) + p = cl.Program(ctx, source=test_struct) |> cl.build! + + part3 = cl.Kernel(p, "part3") + + X = fill(1f0, 10) + Y = fill(1f0, 10) + + P = [Params(0.5, 10.0, [0.0, 0.0], 3)] + + #TODO: constructor for single immutable types.., check if passed parameter isbits + P_buf = cl.Buffer(Params, ctx, length(P), :r) + cl.write!(q, P_buf, P) + + X_buf = cl.Buffer(Float32, ctx, length(X), (:r, :copy), hostbuf=X) + Y_buf = cl.Buffer(Float32, ctx, length(Y), (:r, :copy), hostbuf=Y) + R_buf = cl.Buffer(Float32, ctx, length(X), :w) + + global_size = size(X) + q(part3, global_size, nothing, X_buf, Y_buf, R_buf, P_buf) + + r = cl.read(q, R_buf) + @test all(x -> x == 13.5, r) +end + +end + +mutable struct MutableParams + A::Float32 + B::Float32 +end + + +let test_mutable_pointerfree = " + typedef struct Params + { + float A; + float B; + } Params; + + + __kernel void part3( + __global float *a, + Params test + ){ + a[0] = test.A; + a[1] = test.B; + } +" + + +@testset "Struct Buffer Test" begin + ctx = cl.Context(device) + q = cl.CmdQueue(ctx) + p = cl.Program(ctx, source=test_mutable_pointerfree) |> cl.build! + + part3 = cl.Kernel(p, "part3") + + P = MutableParams(0.5, 10.0) + P_buf = cl.Buffer(Float32, ctx, 2, :w) + q(part3, 1, nothing, P_buf, P) + + r = cl.read(q, P_buf) + + @test r[1] == 0.5 + @test r[2] == 10.0 +end + +end diff --git a/test/buffer.jl b/test/buffer.jl new file mode 100644 index 00000000..d28867e2 --- /dev/null +++ b/test/buffer.jl @@ -0,0 +1,213 @@ +using Base.GC + +struct TestStruct + a::Cint + b::Cfloat +end + +@testset "Buffer" begin + @testset "constructors" begin + ctx = cl.Context(device) + testarray = zeros(Float32, 1000) + + @test cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_ALLOC_HOST_PTR | cl.CL_MEM_READ_ONLY) != nothing + + @test cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_ALLOC_HOST_PTR | cl.CL_MEM_WRITE_ONLY) != nothing + + @test cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_ALLOC_HOST_PTR | cl.CL_MEM_READ_WRITE) != nothing + + buf = cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_ALLOC_HOST_PTR | cl.CL_MEM_READ_WRITE) + @test length(buf) == length(testarray) + + @test cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_COPY_HOST_PTR | cl.CL_MEM_READ_ONLY; + hostbuf=testarray) != nothing + + @test cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_COPY_HOST_PTR | cl.CL_MEM_WRITE_ONLY; + hostbuf=testarray) != nothing + + @test cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_COPY_HOST_PTR | cl.CL_MEM_READ_WRITE; + hostbuf=testarray) != nothing + + buf = cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_COPY_HOST_PTR | cl.CL_MEM_READ_WRITE; + hostbuf=testarray) + @test length(buf) == length(testarray) + + @test cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_USE_HOST_PTR | cl.CL_MEM_READ_ONLY; + hostbuf=testarray) != nothing + + @test cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_USE_HOST_PTR | cl.CL_MEM_WRITE_ONLY; + hostbuf=testarray) != nothing + + @test cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_USE_HOST_PTR | cl.CL_MEM_READ_WRITE; + hostbuf=testarray) != nothing + + buf = cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_USE_HOST_PTR | cl.CL_MEM_READ_WRITE; + hostbuf=testarray) + @test length(buf) == length(testarray) + + # invalid buffer size should throw error + @test_throws cl.CLError cl.Buffer(Float32, ctx, +0, cl.CL_MEM_ALLOC_HOST_PTR) + @test_throws InexactError cl.Buffer(Float32, ctx, -1, cl.CL_MEM_ALLOC_HOST_PTR) + + # invalid flag combinations should throw error + @test_throws cl.CLError cl.Buffer(Float32, ctx, length(testarray), + cl.CL_MEM_USE_HOST_PTR | cl.CL_MEM_ALLOC_HOST_PTR; + hostbuf=testarray) + + # invalid host pointer should throw error + @test_throws TypeError cl.Buffer(Float32, ctx, 1, cl.CL_MEM_COPY_HOST_PTR; + hostbuf=C_NULL) + + @test_throws TypeError cl.Buffer(Float32, ctx, 1, cl.CL_MEM_USE_HOST_PTR, + hostbuf=C_NULL) + end + + @testset "constructors symbols" begin + ctx = cl.Context(device) + + for mf1 in [:rw, :r, :w] + for mf2 in [:copy, :use, :alloc, :null] + for mtype in [cl.Cchar, + cl.Cuchar, + cl.Cshort, + cl.Cushort, + Cint, + cl.Cuint, + cl.Clong, + cl.Culong, + Float16, + Cfloat, + Cdouble, + #TODO: bool, vector_types, struct_types... + ] + testarray = zeros(mtype, 100) + if mf2 == :copy || mf2 == :use + @test cl.Buffer(mtype, ctx, length(testarray), (mf1, mf2); + hostbuf=testarray) != nothing + buf = cl.Buffer(mtype, ctx, length(testarray), (mf1, mf2); + hostbuf=testarray) + @test length(buf) == length(testarray) + elseif mf2 == :alloc + @test cl.Buffer(mtype, ctx, length(testarray), + (mf1, mf2)) != nothing + buf = cl.Buffer(mtype, ctx, length(testarray), (mf1, mf2)) + @test length(buf) == length(testarray) + end + end + end + end + + test_array = Vector{TestStruct}(undef, 100) + @test cl.Buffer(TestStruct, ctx, length(test_array), :alloc) != nothing + @test cl.Buffer(TestStruct, ctx, length(test_array), :copy; + hostbuf=test_array) != nothing + + # invalid buffer size should throw error + @test_throws cl.CLError cl.Buffer(Float32, ctx, +0, :alloc) + @test_throws InexactError cl.Buffer(Float32, ctx, -1, :alloc) + + # invalid flag combinations should throw error + @test_throws ArgumentError cl.Buffer(Float32, ctx, length(test_array), + (:use, :alloc), hostbuf=test_array) + + # invalid host pointer should throw error + @test_throws TypeError cl.Buffer(Float32, ctx, 1, :copy, hostbuf=C_NULL) + + @test_throws TypeError cl.Buffer(Float32, ctx, 1, :use, hostbuf=C_NULL) + end + + @testset "fill" begin + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + testarray = zeros(Float32, 1000) + buf = cl.Buffer(Float32, ctx, length(testarray), (:rw, :copy), hostbuf=testarray) + @test length(buf) == length(testarray) + + cl.fill!(queue, buf, 1f0) + readback = cl.read(queue, buf) + @test all(x -> x == 1.0, readback) + @test all(x -> x == 0.0, testarray) + @test buf.valid == true + end + + @testset "write!" begin + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + testarray = zeros(Float32, 1000) + buf = cl.Buffer(Float32, ctx, length(testarray), (:rw, :copy); hostbuf=testarray) + @test length(buf) == length(testarray) + cl.write!(queue, buf, ones(Float32, length(testarray))) + readback = cl.read(queue, buf) + @test all(x -> x == 1.0, readback) == true + @test buf.valid == true + end + + @testset "empty_like" begin + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + testarray = zeros(Float32, 1000) + buf = cl.Buffer(Float32, ctx, length(testarray), (:rw, :copy); hostbuf=testarray) + + @test sizeof(cl.empty_like(ctx, buf)) == sizeof(testarray) + end + + @testset "copy!" begin + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + test_array = fill(2f0, 1000) + a_buf = cl.Buffer(Float32, ctx, length(test_array)) + b_buf = cl.Buffer(Float32, ctx, length(test_array)) + c_arr = Vector{Float32}(undef, length(test_array)) + # host to device buffer + cl.copy!(queue, a_buf, test_array) + # device buffer to device buffer + cl.copy!(queue, b_buf, a_buf) + # device buffer to host + cl.copy!(queue, c_arr, b_buf) + @test all(x -> isapprox(x, 2.0), c_arr) == true + end + + @testset "map/unmap" begin + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + b = cl.Buffer(Float32, ctx, 100, :rw) + for f in (:r, :w, :rw) + a, evt = cl.enqueue_map_mem(queue, b, f, 0, (10,10)) + cl.wait(evt) + @test size(a) == (10,10) + @test typeof(a) == Array{Float32,2} + + # cannot unmap a buffer without same host array + bad = similar(a) + @test_throws ArgumentError cl.unmap!(queue, b, bad) + + @test cl.ismapped(b) == true + cl.unmap!(queue, b, a) + @test cl.ismapped(b) == false + + # cannot unmap an unmapped buffer + @test_throws ArgumentError cl.unmap!(queue, b, a) + + # gc here quickly force any memory errors + Base.GC.gc() + end + @test cl.ismapped(b) == false + a, evt = cl.enqueue_map_mem(queue, b, :rw, 0, (10,10)) + @test cl.ismapped(b) == true + evt = cl.enqueue_unmap_mem(queue, b, a, wait_for=evt) + cl.wait(evt) + @test cl.ismapped(b) == false + end +end diff --git a/test/cmdqueue.jl b/test/cmdqueue.jl new file mode 100644 index 00000000..de4b5a40 --- /dev/null +++ b/test/cmdqueue.jl @@ -0,0 +1,36 @@ +@testset "CmdQueue" begin + @testset "constructor" begin + @test_throws MethodError cl.CmdQueue(nothing, nothing) + ctx = cl.Context(device) + @test cl.CmdQueue(ctx) != nothing + @test cl.CmdQueue(ctx, device) != nothing + @test cl.CmdQueue(ctx, :profile) != nothing + try + cl.CmdQueue(ctx, device, :out_of_order) + cl.CmdQueue(ctx, device, (:profile, :out_of_order)) + catch err + @warn("Platform $(device[:platform][:name]) does not seem to " * + "suport out of order queues: \n$err",maxlog=1, + exception=(err, catch_backtrace())) + end + @test_throws ArgumentError cl.CmdQueue(ctx, device, :unrecognized_flag) + for flag in [:profile, :out_of_order] + @test_throws ArgumentError cl.CmdQueue(ctx, (flag, :unrecognized_flag)) + @test_throws ArgumentError cl.CmdQueue(ctx, device, (:unrecognized_flag, flag)) + @test_throws ArgumentError cl.CmdQueue(ctx, (flag, flag)) + @test_throws ArgumentError cl.CmdQueue(ctx, device, (flag, flag)) + end + end + + @testset "info" begin + ctx = cl.Context(device) + q1 = cl.CmdQueue(ctx) + q2 = cl.CmdQueue(ctx, device) + for q in (q1, q2) + @test q[:context] == ctx + @test q[:device] == device + @test q[:reference_count] > 0 + @test typeof(q[:properties]) == cl.cl_command_queue_properties + end + end +end diff --git a/test/context.jl b/test/context.jl new file mode 100644 index 00000000..dca99a13 --- /dev/null +++ b/test/context.jl @@ -0,0 +1,110 @@ + +function context_test_callback(arg1, arg2, arg3) + # We're not really testing it because, nvidia doesn't seem to care about this functionality: + # https://devtalk.nvidia.com/default/topic/497433/context-callback-never-called/ + OpenCL.cl.log_error("Callback works") + return +end + +function create_context_error(ctx) + empty_kernel = " + __kernel void test() { + int c = 1 + 1; + };" + try + p = cl.Program(ctx, source = empty_kernel) |> cl.build! + k = cl.Kernel(p, "test") + q = cl.CmdQueue(ctx) + q(k, 1, 10000000) + catch + end +end + + +@testset "Context" begin + @testset "constructor" begin + @test_throws MethodError (cl.Context([])) + ctx = cl.Context(device) + @test ctx != nothing + ctx_id = ctx.id + ctx2 = cl.Context(ctx_id) + @test cl.is_ctx_id_alive(ctx_id) + @test ctx.id != C_NULL + @test ctx2.id != C_NULL + finalize(ctx) + @test ctx.id == C_NULL + @test ctx2.id != C_NULL + @test cl.is_ctx_id_alive(ctx_id) + finalize(ctx2) + @test ctx.id == C_NULL + @test ctx2.id == C_NULL + # jeez, this segfaults... WHY? I suspect a driver bug for refcount == 0? + # NVIDIA 381.22 + #@test !cl.is_ctx_id_alive(ctx_id) + @testset "Context callback" begin + ctx = cl.Context(device, callback = context_test_callback) + create_context_error(ctx) + end + end + + + @testset "platform properties" begin + try + cl.Context(cl.CL_DEVICE_TYPE_CPU) + catch err + @test typeof(err) == cl.CLError + # CL_DEVICE_NOT_FOUND could be throw for GPU only drivers + @test err.desc in (:CL_INVALID_PLATFORM, + :CL_DEVICE_NOT_FOUND) + end + + properties = [(cl.CL_CONTEXT_PLATFORM, platform)] + for (cl_dev_type, sym_dev_type) in [(cl.CL_DEVICE_TYPE_CPU, :cpu), + (cl.CL_DEVICE_TYPE_GPU, :gpu)] + if !cl.has_device_type(platform, sym_dev_type) + continue + end + @test cl.Context(sym_dev_type, properties=properties) != nothing + @test cl.Context(cl_dev_type, properties=properties) != nothing + ctx = cl.Context(cl_dev_type, properties=properties) + @test isempty(cl.properties(ctx)) == false + test_properties = cl.properties(ctx) + + @test test_properties == properties + + platform_in_properties = false + for (t, v) in test_properties + if t == cl.CL_CONTEXT_PLATFORM + @test v[:name] == platform[:name] + @test v == platform + platform_in_properties = true + break + end + end + @test platform_in_properties + end + try + ctx2 = cl.Context(cl.CL_DEVICE_TYPE_ACCELERATOR, + properties=properties) + catch err + @test typeof(err) == cl.CLError + @test err.desc == :CL_DEVICE_NOT_FOUND + end + end + + @testset "create_some_context" begin + @test cl.create_some_context() != nothing + @test typeof(cl.create_some_context()) == cl.Context + end + + @testset "parsing" begin + properties = [(cl.CL_CONTEXT_PLATFORM, platform)] + parsed_properties = cl._parse_properties(properties) + + @test isodd(length(parsed_properties)) + @test parsed_properties[end] == 0 + @test parsed_properties[1] == cl.cl_context_properties(cl.CL_CONTEXT_PLATFORM) + @test parsed_properties[2] == cl.cl_context_properties(platform.id) + end + +end diff --git a/test/device.jl b/test/device.jl new file mode 100644 index 00000000..6456d86e --- /dev/null +++ b/test/device.jl @@ -0,0 +1,87 @@ +@testset "Device" begin + @testset "Type" begin + for (t, k) in zip((cl.CL_DEVICE_TYPE_GPU, cl.CL_DEVICE_TYPE_CPU, + cl.CL_DEVICE_TYPE_ACCELERATOR, cl.CL_DEVICE_TYPE_ALL), + (:gpu, :cpu, :accelerator, :all)) + + #for (dk, dt) in zip(cl.devices(platform, k), cl.devices(platform, t)) + # @fact dk == dt --> true + #end + #devices = cl.devices(platform, k) + #for device in devices + # @fact device[:device_type] == t --> true + #end + end + end + + @testset "Equality" begin + devices = cl.devices(platform) + + if length(devices) > 1 + d1 = devices[1] + for d2 in devices[2:end] + @test pointer(d2) != pointer(d1) + @test hash(d2) != hash(d1) + @test isequal(d2, d1) == false + end + end + end + + @testset "Info" begin + device_info_keys = Symbol[ + :driver_version, + :version, + :extensions, + :platform, + :name, + :device_type, + :has_image_support, + :queue_properties, + :has_queue_out_of_order_exec, + :has_queue_profiling, + :has_native_kernel, + :vendor_id, + :max_compute_units, + :max_work_item_size, + :max_clock_frequency, + :address_bits, + :max_read_image_args, + :max_write_image_args, + :global_mem_size, + :max_mem_alloc_size, + :max_const_buffer_size, + :local_mem_size, + :has_local_mem, + :host_unified_memory, + :available, + :compiler_available, + :max_work_group_size, + :max_parameter_size, + :profiling_timer_resolution, + :max_image2d_shape, + :max_image3d_shape, + ] + @test isa(platform, cl.Platform) + @test_throws ArgumentError platform[:zjdlkf] + + @test isa(device, cl.Device) + @test_throws ArgumentError device[:zjdlkf] + for k in device_info_keys + @test device[k] == cl.info(device, k) + if k == :extensions + @test isa(device[k], Array) + if length(device[k]) > 0 + @test isa(device[k], Array{String, 1}) + end + elseif k == :platform + @test device[k] == platform + elseif k == :max_work_item_sizes + @test length(device[k]) == 3 + elseif k == :max_image2d_shape + @test length(device[k]) == 2 + elseif k == :max_image3d_shape + @test length(device[k]) == 3 + end + end + end +end diff --git a/test/event.jl b/test/event.jl new file mode 100644 index 00000000..c81546e0 --- /dev/null +++ b/test/event.jl @@ -0,0 +1,75 @@ +if occursin("Portable", platform[:name]) || + occursin("Intel Gen OCL", platform[:name]) + msg = "$(platform[:name]) does not implement User Events or shows other problems" + @warn(msg) +else +@testset "Event" begin + @testset "status" begin + ctx = cl.Context(device) + evt = cl.UserEvent(ctx) + evt[:status] + @test evt[:status] == :submitted + cl.complete(evt) + @test evt[:status] == :complete + finalize(evt) + end + + @testset "wait" begin + ctx = cl.Context(device) + # create user event + usr_evt = cl.UserEvent(ctx) + q = cl.CmdQueue(ctx) + cl.enqueue_wait_for_events(q, usr_evt) + + # create marker event + mkr_evt = cl.enqueue_marker(q) + + @test usr_evt[:status] == :submitted + @test mkr_evt[:status] in (:queued, :submitted) + + cl.complete(usr_evt) + @test usr_evt[:status] == :complete + + cl.wait(mkr_evt) + @test mkr_evt[:status] == :complete + + @test cl.cl_event_status(:running) == cl.CL_RUNNING + @test cl.cl_event_status(:submitted) == cl.CL_SUBMITTED + @test cl.cl_event_status(:queued) == cl.CL_QUEUED + @test cl.cl_event_status(:complete) == cl.CL_COMPLETE + end + + @testset "callback" begin + global callback_called = Ref(false) + + function test_callback(evt, status) + callback_called[] = true + end + + ctx = cl.Context(device) + usr_evt = cl.UserEvent(ctx) + queue = cl.CmdQueue(ctx) + + cl.enqueue_wait_for_events(queue, usr_evt) + + mkr_evt = cl.enqueue_marker(queue) + cl.add_callback(mkr_evt, test_callback) + + @test usr_evt[:status] == :submitted + @test mkr_evt[:status] in (:queued, :submitted) + @test !callback_called[] + + cl.complete(usr_evt) + @test usr_evt[:status] == :complete + + cl.wait(mkr_evt) + + # Give callback some time to finish + yield() + sleep(0.5) + + @test mkr_evt[:status] == :complete + @test callback_called[] + end +end +end diff --git a/test/kernel.jl b/test/kernel.jl new file mode 100644 index 00000000..23cace5d --- /dev/null +++ b/test/kernel.jl @@ -0,0 +1,210 @@ +struct CLTestStruct + f1::NTuple{3, Float32} + f2::Nothing + f3::Float32 +end + +@testset "Kernel" begin + test_source = " + __kernel void sum(__global const float *a, + __global const float *b, + __global float *c, + const unsigned int count) + { + unsigned int gid = get_global_id(0); + if (gid < count) { + c[gid] = a[gid] + b[gid]; + } + } + " + + #TODO: tests for invalid kernel build error && logs... + + @testset "constructor" begin + ctx = cl.Context(device) + prg = cl.Program(ctx, source=test_source) + @test_throws ArgumentError cl.Kernel(prg, "sum") + cl.build!(prg) + @test cl.Kernel(prg, "sum") != nothing + end + + @testset "info" begin + ctx = cl.Context(device) + prg = cl.Program(ctx, source=test_source) + cl.build!(prg) + k = cl.Kernel(prg, "sum") + @test k[:name] == "sum" + @test k[:num_args] == 4 + @test k[:reference_count] > 0 + @test k[:program] == prg + @test typeof(k[:attributes]) == String + end + + @testset "mem/workgroup size" begin + ctx = cl.Context(device) + prg = cl.Program(ctx, source=test_source) + cl.build!(prg) + k = cl.Kernel(prg, "sum") + for (sf, clf) in [(:size, cl.CL_KERNEL_WORK_GROUP_SIZE), + (:compile_size, cl.CL_KERNEL_COMPILE_WORK_GROUP_SIZE), + (:local_mem_size, cl.CL_KERNEL_LOCAL_MEM_SIZE), + (:private_mem_size, cl.CL_KERNEL_PRIVATE_MEM_SIZE), + (:prefered_size_multiple, cl.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)] + @test cl.work_group_info(k, sf, device) != nothing + @test cl.work_group_info(k, clf, device) != nothing + if sf != :compile_size + @test cl.work_group_info(k, sf, device) == cl.work_group_info(k, clf, device) + end + end + end + + @testset "set_arg!/set_args!" begin + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + + prg = cl.Program(ctx, source=test_source) |> cl.build! + k = cl.Kernel(prg, "sum") + + count = 1024 + nbytes = count * sizeof(Float32) + + h_ones = ones(Float32, count) + + A = cl.Buffer(Float32, ctx, length(h_ones), (:r, :copy), hostbuf=h_ones) + B = cl.Buffer(Float32, ctx, length(h_ones), (:r, :copy), hostbuf=h_ones) + C = cl.Buffer(Float32, ctx, count, :w) + + # sizeof mem object for buffer in bytes + @test sizeof(A) == nbytes + @test sizeof(B) == nbytes + @test sizeof(C) == nbytes + + # we use julia's index by one convention + @test cl.set_arg!(k, 1, A) != nothing + @test cl.set_arg!(k, 2, B) != nothing + @test cl.set_arg!(k, 3, C) != nothing + @test cl.set_arg!(k, 4, UInt32(count)) != nothing + + cl.enqueue_kernel(queue, k, count) |> cl.wait + r = cl.read(queue, C) + + @test all(x -> x == 2.0, r) + cl.flush(queue) + + # test set_args with new kernel + k2 = cl.Kernel(prg, "sum") + cl.set_args!(k2, A, B, C, UInt32(count)) + + h_twos = fill(2f0, count) + cl.copy!(queue, A, h_twos) + cl.copy!(queue, B, h_twos) + + #TODO: check for ocl version, fill is opencl v1.2 + #cl.enqueue_fill(queue, A, 2f0) + #cl.enqueue_fill(queue, B, 2f0) + + cl.enqueue_kernel(queue, k, count) + cl.finish(queue) + + r = cl.read(queue, C) + + @test all(x -> x == 4.0, r) + end + + @testset "enqueue_kernel" begin + simple_kernel = " + __kernel void test(__global float *i) { + *i += 1; + };" + + ctx = cl.Context(device) + + h_buff = Float32[1,] + d_buff = cl.Buffer(Float32, ctx, length(h_buff), (:rw, :copy), hostbuf=h_buff) + + p = cl.Program(ctx, source=simple_kernel) |> cl.build! + k = cl.Kernel(p, "test") + q = cl.CmdQueue(ctx) + + # dimensions must be the same size + @test_throws ArgumentError q(k, (1,), (1,1), d_buff) + @test_throws ArgumentError q(k, (1,1), (1,), d_buff) + + # dimensions are bounded + max_work_dim = device[:max_work_item_dims] + bad = tuple([1 for _ in 1:(max_work_dim + 1)]) + @test_throws MethodError q(k, bad, d_buff) + + # devices have finite work sizes + @test_throws MethodError q(k, (typemax(Int),), d_buff) + + # blocking call to kernel finishes cmd queue + q(k, 1, 1, d_buff) + + r = cl.read(q, d_buff) + @test r[1] == 2 + + # alternative kernel call syntax + k[q, (1,), (1,)](d_buff) + r = cl.read(q, d_buff) + @test r[1] == 3 + + # enqueue task is an alias for calling + # a kernel with a global/local size of 1 + evt = cl.enqueue_task(q, k) + r = cl.read(q, d_buff) + @test r[1] == 4 + end + + @testset "packed structures" begin + test_source = " + struct __attribute__((packed)) Test2{ + long f1; + int __attribute__((aligned (8))) f2; + }; + __kernel void structest(__global float *out, struct Test2 b){ + out[0] = b.f1; + out[1] = b.f2; + } + " + ctx = cl.Context(device) + prg = cl.Program(ctx, source = test_source) + queue = cl.CmdQueue(ctx) + cl.build!(prg) + structkernel = cl.Kernel(prg, "structest") + out = cl.Buffer(Float32, ctx, 2, :w) + bstruct = (1, Int32(4)) + structkernel[queue, (1,)](out, bstruct) + r = cl.read(queue, out) + @test r == [1f0, 4f0] + end + + @testset "empty types" begin + test_source = " + //packed + struct __attribute__((packed)) Test{ + float3 f1; + int f2; // empty type gets replaced with Int32 (no empty types allowed in OpenCL) + // you might need to define the alignement of fields to match julia's layout + float f3; // for the types used here the alignement matches though! + }; + __kernel void structest(__global float *out, struct Test a){ + out[0] = a.f1.x; + out[1] = a.f1.y; + out[2] = a.f1.z; + out[3] = a.f3; + } + " + + ctx = cl.Context(device) + prg = cl.Program(ctx, source = test_source) + queue = cl.CmdQueue(ctx) + cl.build!(prg) + structkernel = cl.Kernel(prg, "structest") + out = cl.Buffer(Float32, ctx, 4, :w) + astruct = CLTestStruct((1f0, 2f0, 3f0), nothing, 22f0) + structkernel[queue, (1,)](out, astruct) + r = cl.read(queue, out) + @test r == [1f0, 2f0, 3f0, 22f0] + end +end diff --git a/test/test_memory.jl b/test/memory.jl similarity index 63% rename from test/test_memory.jl rename to test/memory.jl index 761afb49..dac798e6 100644 --- a/test/test_memory.jl +++ b/test/memory.jl @@ -1,5 +1,13 @@ -@testset "OpenCL.Memory" begin - @testset "OpenCL.CLMemObject context" begin +function create_test_buffer() + ctx = cl.Context(device) + queue = cl.CmdQueue(ctx) + testarray = zeros(Float32, 1000) + buf = cl.Buffer(Float32, ctx, length(testarray), (:rw, :copy), hostbuf=testarray) + return (queue, buf, testarray, ctx) +end + +@testset "Memory" begin + @testset "context" begin _, buf, _, expected = create_test_buffer() ctx = cl.context(buf) @@ -8,7 +16,7 @@ @test isequal(ctx, expected) != nothing end - @testset "OpenCL.CLMemObject properties" begin + @testset "properties" begin _, buf, _, _ = create_test_buffer() expectations = [ diff --git a/test/minver.jl b/test/minver.jl new file mode 100644 index 00000000..d598cd8b --- /dev/null +++ b/test/minver.jl @@ -0,0 +1,33 @@ +@testset "Minver" begin + @testset "platform" begin + version = cl.opencl_version(platform) + + v11 = cl.min_v11(platform) + v12 = cl.min_v12(platform) + v20 = cl.min_v20(platform) + v21 = cl.min_v21(platform) + v22 = cl.min_v22(platform) + + @test v11 == (version >= v"1.1") + @test v12 == (version >= v"1.2") + @test v20 == (version >= v"2.0") + @test v21 == (version >= v"2.1") + @test v22 == (version >= v"2.2") + end + + @testset "device" begin + version = cl.opencl_version(device) + + v11 = cl.min_v11(device) + v12 = cl.min_v12(device) + v20 = cl.min_v20(device) + v21 = cl.min_v21(device) + v22 = cl.min_v22(device) + + @test v11 == (version >= v"1.1") + @test v12 == (version >= v"1.2") + @test v20 == (version >= v"2.0") + @test v21 == (version >= v"2.1") + @test v22 == (version >= v"2.2") + end +end diff --git a/test/platform.jl b/test/platform.jl new file mode 100644 index 00000000..4974d58c --- /dev/null +++ b/test/platform.jl @@ -0,0 +1,32 @@ +@testset "Platform" begin + @testset "Info" begin + @test length(cl.platforms()) == cl.num_platforms() + + @test platform != nothing + @test pointer(platform) != C_NULL + for k in [:profile, :version, :name, :vendor, :extensions] + @test platform[k] == cl.info(platform, k) + end + v = cl.opencl_version(platform) + @test 1 <= v.major <= 3 + @test 0 <= v.minor <= 2 + end + + @testset "Equality" begin + platform = cl.platforms()[1] + platform_copy = cl.platforms()[1] + + @test pointer(platform) == pointer(platform_copy) + @test hash(platform) == hash(platform_copy) + @test isequal(platform, platform) + + if length(cl.platforms()) > 1 + p1 = cl.platforms()[1] + for p2 in cl.platforms()[2:end] + @test pointer(p2) != pointer(p1) + @test hash(p2) != hash(p1) + @test !isequal(p2, p1) + end + end + end +end diff --git a/test/program.jl b/test/program.jl new file mode 100644 index 00000000..e72fd187 --- /dev/null +++ b/test/program.jl @@ -0,0 +1,78 @@ +@testset "Program" begin + test_source = " + __kernel void sum(__global const float *a, + __global const float *b, + __global float *c) + { + uint gid = get_global_id(0); + c[gid] = a[gid] + b[gid]; + } + " + + function create_test_program() + ctx = cl.Context(device) + cl.Program(ctx, source=test_source) + end + + @testset "source constructor" begin + ctx = cl.Context(device) + prg = cl.Program(ctx, source=test_source) + @test prg != nothing + end + @testset "info" begin + ctx = cl.Context(device) + prg = cl.Program(ctx, source=test_source) + + @test prg[:context] == ctx + + @test typeof(prg[:devices]) == Vector{cl.Device} + @test length(prg[:devices]) > 0 + @test device in prg[:devices] + + @test typeof(prg[:source]) == String + @test prg[:source] == test_source + + @test prg[:reference_count] > 0 + @test isempty(strip(prg[:build_log][device])) + end + + @testset "build" begin + ctx = cl.Context(device) + prg = cl.Program(ctx, source=test_source) + @test cl.build!(prg) != nothing + + @test prg[:build_status][device] == cl.CL_BUILD_SUCCESS + @test prg[:build_log][device] isa String + end + + @testset "source code" begin + ctx = cl.Context(device) + prg = cl.Program(ctx, source=test_source) + @test prg[:source] == test_source + end + + if device[:platform][:name] == "Portable Computing Language" + @warn("Skipping unsupported binary build on POCL") + else + @testset "binaries" begin + ctx = cl.Context(device) + prg = cl.Program(ctx, source=test_source) |> cl.build! + + @test device in collect(keys(prg[:binaries])) + binaries = prg[:binaries] + @test device in collect(keys(binaries)) + @test binaries[device] != nothing + @test length(binaries[device]) > 0 + prg2 = cl.Program(ctx, binaries=binaries) + @test prg2[:binaries] == binaries + try + prg2[:source] + error("should not happen") + catch err + @test isa(err, cl.CLError) + @test err.code == -45 + @test err.desc == :CL_INVALID_PROGRAM_EXECUTABLE + end + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 028a0d8e..7e63c9e0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,23 @@ using Test using OpenCL using Base.GC +backend = get(ENV, "JULIA_OPENCL_BACKEND", "POCL") +if backend == "POCL" + # Use POCL for the tests + # XXX: support testing with other OpenCL implementations + using pocl_jll + platform = filter(cl.platforms()) do platform + cl.info(platform, :name) == "Portable Computing Language" + end |> first + device = first(cl.devices(platform, :cpu)) +else + platform = first(cl.platforms()) + device = first(cl.devices(platform)) +end +@info "Testing using $backend back-end" platform device + +@testset "OpenCL.jl" begin + @testset "layout" begin x = ((10f0, 1f0, 2f0), (10f0, 1f0, 2f0), (10f0, 1f0, 2f0)) clx = cl.replace_different_layout(x) @@ -13,31 +30,24 @@ using Base.GC @test clx == 0 # TODO should it be like this? end -function create_test_buffer() - ctx = cl.create_some_context() - queue = cl.CmdQueue(ctx) - testarray = zeros(Float32, 1000) - buf = cl.Buffer(Float32, ctx, (:rw, :copy), hostbuf=testarray) - return (queue, buf, testarray, ctx) -end - -include("test_platform.jl") -include("test_context.jl") -include("test_device.jl") -include("test_cmdqueue.jl") -include("test_minver.jl") -#TODO: fix test_event.jl -#include("test_event.jl") -include("test_program.jl") -include("test_kernel.jl") -include("test_behaviour.jl") -include("test_memory.jl") -include("test_buffer.jl") -include("test_array.jl") +include("platform.jl") +include("context.jl") +include("device.jl") +include("cmdqueue.jl") +include("minver.jl") +#include("event.jl") +include("program.jl") +include("kernel.jl") +include("behaviour.jl") +include("memory.jl") +include("buffer.jl") +include("array.jl") @testset "context jl reference counting" begin Base.GC.gc() @test isempty(cl._ctx_reference_count) end +end + end # module diff --git a/test/script.gdb b/test/script.gdb deleted file mode 100644 index ebee68ea..00000000 --- a/test/script.gdb +++ /dev/null @@ -1,3 +0,0 @@ -set env MALLOC_CHECK_=3 -run -backtrace full diff --git a/test/test_array.jl b/test/test_array.jl deleted file mode 100644 index 4550bc6a..00000000 --- a/test/test_array.jl +++ /dev/null @@ -1,60 +0,0 @@ -import OpenCL.cl.CLArray - -using LinearAlgebra - -@testset "OpenCL.CLArray" begin - - @testset "OpenCL.CLArray constructors" begin - for device in cl.devices() - - ctx = cl.Context(device) - queue = cl.CmdQueue(ctx) - hostarray = zeros(Float32, 128*64) - A = CLArray(queue, hostarray) - - @test CLArray(queue, (:rw, :copy), hostarray) != nothing - - @test CLArray(queue, hostarray, flags=(:rw, :copy)) != nothing - - @test CLArray(queue, hostarray) != nothing - - @test CLArray(cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=hostarray), - queue, - (128, 64)) != nothing - - @test copy(A) == A - end - end - - @testset "OpenCL.CLArray fill" begin - for device in cl.devices() - ctx = cl.Context(device) - queue = cl.CmdQueue(ctx) - - @test cl.to_host(cl.fill(Float32, queue, Float32(0.5), - 32, 64)) == fill(Float32(0.5), 32, 64) - @test cl.to_host(cl.zeros(Float32, queue, 64)) == zeros(Float32, 64) - @test cl.to_host(cl.ones(Float32, queue, 64)) == ones(Float32, 64) - end - end - - @testset "OpenCL.CLArray core functions" begin - for device in cl.devices() - ctx = cl.Context(device) - queue = cl.CmdQueue(ctx) - A = CLArray(queue, rand(Float32, 128, 64)) - @test size(A) == (128, 64) - @test ndims(A) == 2 - @test length(A) == 128*64 - # reshape - B = reshape(A, 128*64) - @test reshape(B, 128, 64) == A - # transpose - X = CLArray(queue, rand(Float32, 32, 32)) - B = cl.zeros(Float32, queue, 64, 128) - ev = transpose!(B, A) - cl.wait(ev) - #@test cl.to_host(copy(A')) == cl.to_host(B) - end - end -end diff --git a/test/test_behaviour.jl b/test/test_behaviour.jl deleted file mode 100644 index fc6e60dc..00000000 --- a/test/test_behaviour.jl +++ /dev/null @@ -1,328 +0,0 @@ -#= -info( -"====================================================================== - Running Behavior Tests - ======================================================================") -=# -@testset "OpenCL Hello World Test" begin - - hello_world_kernel = " - #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable - - __constant char hw[] = \"hello world\"; - - __kernel void hello(__global char *out) { - int tid = get_global_id(0); - out[tid] = hw[tid]; - }" - - hello_world_str = "hello world" - - for device in cl.devices() - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL.Kernel mem/workgroup size for Portable Computing Language Platform") - continue - end - - ctx = cl.Context(device) - queue = cl.CmdQueue(ctx) - - str_len = length(hello_world_str) + 1 - out_buf = cl.Buffer(Cchar, ctx, :w, sizeof(Cchar) * str_len) - - prg = cl.Program(ctx, source=hello_world_kernel) |> cl.build! - kern = cl.Kernel(prg, "hello") - - queue(kern, str_len, nothing, out_buf) - h = cl.read(queue, out_buf) - - @test cl.CLString(h) == hello_world_str - end -end - - -@testset "OpenCL Low Level Api Test" begin - - test_source = " - __kernel void sum(__global const float *a, - __global const float *b, - __global float *c, - const unsigned int count) - { - unsigned int gid = get_global_id(0); - if (gid < count) { - c[gid] = a[gid] + b[gid]; - } - } - " - - for device in cl.devices() - - len = 1024 - h_a = Vector{cl.CL_float}(undef, len) - h_b = Vector{cl.CL_float}(undef, len) - h_c = Vector{cl.CL_float}(undef, len) - h_d = Vector{cl.CL_float}(undef, len) - h_e = Vector{cl.CL_float}(undef, len) - h_f = Vector{cl.CL_float}(undef, len) - h_g = Vector{cl.CL_float}(undef, len) - - for i in 1:len - h_a[i] = cl.cl_float(rand()) - h_b[i] = cl.cl_float(rand()) - h_e[i] = cl.cl_float(rand()) - h_g[i] = cl.cl_float(rand()) - end - - err_code = Ref{cl.CL_int}() - - # create compute context (TODO: fails if function ptr's not passed...) - ctx_id = cl.api.clCreateContext(C_NULL, 1, [device.id], - C_NULL, - C_NULL, - err_code) - if err_code[] != cl.CL_SUCCESS - throw(cl.CLError(err_code[])) - end - - q_id = cl.api.clCreateCommandQueue(ctx_id, device.id, 0, err_code) - if err_code[] != cl.CL_SUCCESS - error("Failed to create command queue") - end - - # create program - bytesource = String(test_source) - prg_id = cl.api.clCreateProgramWithSource(ctx_id, 1, [bytesource], C_NULL, err_code) - if err_code[] != cl.CL_SUCCESS - error("Failed to create program") - end - - # build program - err = cl.api.clBuildProgram(prg_id, 0, C_NULL, C_NULL, C_NULL, C_NULL) - if err != cl.CL_SUCCESS - error("Failed to build program") - end - - # create compute kernel - k_id = cl.api.clCreateKernel(prg_id, "sum", err_code) - if err_code[] != cl.CL_SUCCESS - error("Failed to create compute kernel") - end - - # create input array in device memory - Aid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, - sizeof(cl.CL_float) * len, h_a, err_code) - if err_code[] != cl.CL_SUCCESS - error("Error creating buffer A") - end - Bid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR, - sizeof(cl.CL_float) * len, h_b, err_code) - if err_code[] != cl.CL_SUCCESS - error("Error creating buffer B") - end - Eid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_COPY_HOST_PTR, - sizeof(cl.CL_float) * len, h_e, err_code) - if err_code[] != cl.CL_SUCCESS - error("Error creating buffer E") - end - Gid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_COPY_HOST_PTR, - sizeof(cl.CL_float) * len, h_g, err_code) - if err_code[] != cl.CL_SUCCESS - error("Error creating buffer G") - end - - # create output arrays in device memory - - Cid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_READ_WRITE, - sizeof(cl.CL_float) * len, C_NULL, err_code) - if err_code[] != cl.CL_SUCCESS - error("Error creating buffer C") - end - Did = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_READ_WRITE, - sizeof(cl.CL_float) * len, C_NULL, err_code) - if err_code[] != cl.CL_SUCCESS - error("Error creating buffer D") - end - Fid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY, - sizeof(cl.CL_float) * len, C_NULL, err_code) - if err_code[] != cl.CL_SUCCESS - error("Error creating buffer F") - end - - err = cl.api.clSetKernelArg(k_id, 0, sizeof(cl.CL_mem), [Aid]) - err |= cl.api.clSetKernelArg(k_id, 1, sizeof(cl.CL_mem), [Bid]) - err |= cl.api.clSetKernelArg(k_id, 2, sizeof(cl.CL_mem), [Cid]) - err |= cl.api.clSetKernelArg(k_id, 3, sizeof(cl.CL_uint), cl.CL_uint[len]) - if err != cl.CL_SUCCESS - error("Error setting kernel 1 args") - end - - nglobal = Ref{Csize_t}(len) - err = cl.api.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL, - nglobal, C_NULL, 0, C_NULL, C_NULL) - if err != cl.CL_SUCCESS - error("Failed to execute kernel 1") - end - - err = cl.api.clSetKernelArg(k_id, 0, sizeof(cl.CL_mem), [Eid]) - err |= cl.api.clSetKernelArg(k_id, 1, sizeof(cl.CL_mem), [Cid]) - err |= cl.api.clSetKernelArg(k_id, 2, sizeof(cl.CL_mem), [Did]) - if err != cl.CL_SUCCESS - error("Error setting kernel 2 args") - end - err = cl.api.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL, - nglobal, C_NULL, 0, C_NULL, C_NULL) - if err != cl.CL_SUCCESS - error("Failed to execute kernel 2") - end - - err = cl.api.clSetKernelArg(k_id, 0, sizeof(cl.CL_mem), [Gid]) - err |= cl.api.clSetKernelArg(k_id, 1, sizeof(cl.CL_mem), [Did]) - err |= cl.api.clSetKernelArg(k_id, 2, sizeof(cl.CL_mem), [Fid]) - if err != cl.CL_SUCCESS - error("Error setting kernel 3 args") - end - err = cl.api.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL, - nglobal, C_NULL, 0, C_NULL, C_NULL) - if err != cl.CL_SUCCESS - error("Failed to execute kernel 3") - end - - # read back the result from compute device... - err = cl.api.clEnqueueReadBuffer(q_id, Fid, cl.CL_TRUE, 0, - sizeof(cl.CL_float) * len, h_f, 0, C_NULL, C_NULL) - if err != cl.CL_SUCCESS - error("Failed to read output array") - end - - # test results - for i in 1:len - tmp = h_a[i] + h_b[i] + h_e[i] + h_g[i] - @test tmp ≈ h_f[i] - end - end -end - -struct Params - A::Float32 - B::Float32 - #TODO: fixed size arrays? - X1::Float32 - X2::Float32 - C::Int32 - Params(a, b, x, c) = begin - new(Float32(a), - Float32(b), - Float32(x[1]), - Float32(x[2]), - Int32(c)) - end -end - -let test_struct = " - typedef struct Params - { - float A; - float B; - float x[2]; //padding - int C; - } Params; - - - __kernel void part3(__global const float *a, - __global const float *b, - __global float *c, - __constant struct Params* test) - { - int gid = get_global_id(0); - c[gid] = test->A * a[gid] + test->B * b[gid] + test->C; - } -" - -@testset "OpenCL Struct Buffer Test" begin - for device in cl.devices() - - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL Struct Buffer Test for Portable Computing Language Platform") - continue - end - - ctx = cl.Context(device) - q = cl.CmdQueue(ctx) - p = cl.Program(ctx, source=test_struct) |> cl.build! - - part3 = cl.Kernel(p, "part3") - - X = fill(1f0, 10) - Y = fill(1f0, 10) - - P = [Params(0.5, 10.0, [0.0, 0.0], 3)] - - #TODO: constructor for single immutable types.., check if passed parameter isbits - P_buf = cl.Buffer(Params, ctx, :r, length(P)) - cl.write!(q, P_buf, P) - - X_buf = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=X) - Y_buf = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=Y) - R_buf = cl.Buffer(Float32, ctx, :w, length(X)) - - global_size = size(X) - q(part3, global_size, nothing, X_buf, Y_buf, R_buf, P_buf) - - r = cl.read(q, R_buf) - @test all(x -> x == 13.5, r) - end -end - -end - -mutable struct MutableParams - A::Float32 - B::Float32 -end - - -let test_mutable_pointerfree = " - typedef struct Params - { - float A; - float B; - } Params; - - - __kernel void part3( - __global float *a, - Params test - ){ - a[0] = test.A; - a[1] = test.B; - } -" - - -@testset "OpenCL Struct Buffer Test" begin - for device in cl.devices() - - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL Struct Buffer Test for Portable Computing Language Platform") - continue - end - - ctx = cl.Context(device) - q = cl.CmdQueue(ctx) - p = cl.Program(ctx, source=test_mutable_pointerfree) |> cl.build! - - part3 = cl.Kernel(p, "part3") - - P = MutableParams(0.5, 10.0) - P_buf = cl.Buffer(Float32, ctx, :w, 2) - q(part3, 1, nothing, P_buf, P) - - r = cl.read(q, P_buf) - - @test r[1] == 0.5 - @test r[2] == 10.0 - end -end - -end diff --git a/test/test_buffer.jl b/test/test_buffer.jl deleted file mode 100644 index f3b425d8..00000000 --- a/test/test_buffer.jl +++ /dev/null @@ -1,224 +0,0 @@ -using Base.GC - -struct TestStruct - a::cl.CL_int - b::cl.CL_float -end - -@testset "OpenCL.Buffer" begin - @testset "OpenCL.Buffer constructors" begin - for device in cl.devices() - - ctx = cl.Context(device) - testarray = zeros(Float32, 1000) - - @test cl.Buffer(Float32, ctx, cl.CL_MEM_ALLOC_HOST_PTR | cl.CL_MEM_READ_ONLY, - length(testarray)) != nothing - - @test cl.Buffer(Float32, ctx, cl.CL_MEM_ALLOC_HOST_PTR | cl.CL_MEM_WRITE_ONLY, - length(testarray)) != nothing - - @test cl.Buffer(Float32, ctx, cl.CL_MEM_ALLOC_HOST_PTR | cl.CL_MEM_READ_WRITE, - length(testarray)) != nothing - - buf = cl.Buffer(Float32, ctx, cl.CL_MEM_ALLOC_HOST_PTR | cl.CL_MEM_READ_WRITE, length(testarray)) - @test length(buf) == length(testarray) - - @test cl.Buffer(Float32, ctx, cl.CL_MEM_COPY_HOST_PTR | cl.CL_MEM_READ_ONLY, - hostbuf=testarray) != nothing - - @test cl.Buffer(Float32, ctx, cl.CL_MEM_COPY_HOST_PTR | cl.CL_MEM_WRITE_ONLY, - hostbuf=testarray) != nothing - - @test cl.Buffer(Float32, ctx, cl.CL_MEM_COPY_HOST_PTR | cl.CL_MEM_READ_WRITE, - hostbuf=testarray) != nothing - - buf = cl.Buffer(Float32, ctx, cl.CL_MEM_COPY_HOST_PTR | cl.CL_MEM_READ_WRITE, hostbuf=testarray) - @test length(buf) == length(testarray) - - @test cl.Buffer(Float32, ctx, cl.CL_MEM_USE_HOST_PTR | cl.CL_MEM_READ_ONLY, - hostbuf=testarray) != nothing - - @test cl.Buffer(Float32, ctx, cl.CL_MEM_USE_HOST_PTR | cl.CL_MEM_WRITE_ONLY, - hostbuf=testarray) != nothing - - @test cl.Buffer(Float32, ctx, cl.CL_MEM_USE_HOST_PTR | cl.CL_MEM_READ_WRITE, - hostbuf=testarray) != nothing - - buf = cl.Buffer(Float32, ctx, cl.CL_MEM_USE_HOST_PTR | cl.CL_MEM_READ_WRITE, hostbuf=testarray) - @test length(buf) == length(testarray) - - # invalid buffer size should throw error - @test_throws cl.CLError cl.Buffer(Float32, ctx, cl.CL_MEM_ALLOC_HOST_PTR, +0) - @test_throws InexactError cl.Buffer(Float32, ctx, cl.CL_MEM_ALLOC_HOST_PTR, -1) - - # invalid flag combinations should throw error - @test_throws cl.CLError cl.Buffer(Float32, ctx, cl.CL_MEM_USE_HOST_PTR | cl.CL_MEM_ALLOC_HOST_PTR, - hostbuf=testarray) - - # invalid host pointer should throw error - @test_throws TypeError cl.Buffer(Float32, ctx, cl.CL_MEM_COPY_HOST_PTR, - hostbuf=C_NULL) - - @test_throws TypeError cl.Buffer(Float32, ctx, cl.CL_MEM_USE_HOST_PTR, - hostbuf=C_NULL) - end - end - - @testset "OpenCL.Buffer constructors symbols" begin - for device in cl.devices() - ctx = cl.Context(device) - - for mf1 in [:rw, :r, :w] - for mf2 in [:copy, :use, :alloc, :null] - for mtype in [cl.CL_char, - cl.CL_uchar, - cl.CL_short, - cl.CL_ushort, - cl.CL_int, - cl.CL_uint, - cl.CL_long, - cl.CL_ulong, - cl.CL_half, - cl.CL_float, - cl.CL_double, - #TODO: bool, vector_types, struct_types... - ] - testarray = zeros(mtype, 100) - if mf2 == :copy || mf2 == :use - @test cl.Buffer(mtype, ctx, (mf1, mf2), hostbuf=testarray) != nothing - buf = cl.Buffer(mtype, ctx, (mf1, mf2), hostbuf=testarray) - @test length(buf) == length(testarray) - elseif mf2 == :alloc - @test cl.Buffer(mtype, ctx, (mf1, mf2), - length(testarray)) != nothing - buf = cl.Buffer(mtype, ctx, (mf1, mf2), length(testarray)) - @test length(buf) == length(testarray) - end - end - end - end - - test_array = Vector{TestStruct}(undef, 100) - @test cl.Buffer(TestStruct, ctx, :alloc, length(test_array)) != nothing - @test cl.Buffer(TestStruct, ctx, :copy, hostbuf=test_array) != nothing - - # invalid buffer size should throw error - @test_throws cl.CLError cl.Buffer(Float32, ctx, :alloc, +0) - @test_throws InexactError cl.Buffer(Float32, ctx, :alloc, -1) - - # invalid flag combinations should throw error - @test_throws ArgumentError cl.Buffer(Float32, ctx, (:use, :alloc), hostbuf=test_array) - - # invalid host pointer should throw error - @test_throws TypeError cl.Buffer(Float32, ctx, :copy, hostbuf=C_NULL) - - @test_throws TypeError cl.Buffer(Float32, ctx, :use, hostbuf=C_NULL) - - end - end - - @testset "OpenCL.Buffer fill" begin - for device in cl.devices() - if occursin("Portable", device[:platform][:name]) - # the pocl platform claims to implement v1.2 of the spec, but does not - @warn("Skipping test OpenCL.Buffer fill for POCL Platform") - continue - end - ctx = cl.Context(device) - queue = cl.CmdQueue(ctx) - testarray = zeros(Float32, 1000) - buf = cl.Buffer(Float32, ctx, (:rw, :copy), hostbuf=testarray) - @test length(buf) == length(testarray) - - v = cl.opencl_version(device) - if v.major == 1 && v.minor < 2 - platform_name = device[:platform][:name] - @info("Skipping OpenCL.Buffer fill for $platform_name: fill is a v1.2 command") - continue - end - cl.fill!(queue, buf, 1f0) - readback = cl.read(queue, buf) - @test all(x -> x == 1.0, readback) - @test all(x -> x == 0.0, testarray) - @test buf.valid == true - end - end - - @testset "OpenCL.Buffer write!" begin - for device in cl.devices() - ctx = cl.Context(device) - queue = cl.CmdQueue(ctx) - testarray = zeros(Float32, 1000) - buf = cl.Buffer(Float32, ctx, (:rw, :copy), hostbuf=testarray) - @test length(buf) == length(testarray) - cl.write!(queue, buf, ones(Float32, length(testarray))) - readback = cl.read(queue, buf) - @test all(x -> x == 1.0, readback) == true - @test buf.valid == true - end - end - - @testset "OpenCL.Buffer empty_like" begin - for device in cl.devices() - ctx = cl.Context(device) - queue = cl.CmdQueue(ctx) - testarray = zeros(Float32, 1000) - buf = cl.Buffer(Float32, ctx, (:rw, :copy), hostbuf=testarray) - - @test sizeof(cl.empty_like(ctx, buf)) == sizeof(testarray) - end - end - - @testset "OpenCL.Buffer copy!" begin - for device in cl.devices() - ctx = cl.Context(device) - queue = cl.CmdQueue(ctx) - test_array = fill(2f0, 1000) - a_buf = cl.Buffer(Float32, ctx, length(test_array)) - b_buf = cl.Buffer(Float32, ctx, length(test_array)) - c_arr = Vector{Float32}(undef, length(test_array)) - # host to device buffer - cl.copy!(queue, a_buf, test_array) - # device buffer to device buffer - cl.copy!(queue, b_buf, a_buf) - # device buffer to host - cl.copy!(queue, c_arr, b_buf) - @test all(x -> isapprox(x, 2.0), c_arr) == true - end - end - - @testset "OpenCL.Buffer map/unmap" begin - for device in cl.devices() - ctx = cl.Context(device) - queue = cl.CmdQueue(ctx) - b = cl.Buffer(Float32, ctx, :rw, 100) - for f in (:r, :w, :rw) - a, evt = cl.enqueue_map_mem(queue, b, f, 0, (10,10)) - cl.wait(evt) - @test size(a) == (10,10) - @test typeof(a) == Array{Float32,2} - - # cannot unmap a buffer without same host array - bad = similar(a) - @test_throws ArgumentError cl.unmap!(queue, b, bad) - - @test cl.ismapped(b) == true - cl.unmap!(queue, b, a) - @test cl.ismapped(b) == false - - # cannot unmap an unmapped buffer - @test_throws ArgumentError cl.unmap!(queue, b, a) - - # gc here quickly force any memory errors - Base.GC.gc() - end - @test cl.ismapped(b) == false - a, evt = cl.enqueue_map_mem(queue, b, :rw, 0, (10,10)) - @test cl.ismapped(b) == true - evt = cl.enqueue_unmap_mem(queue, b, a, wait_for=evt) - cl.wait(evt) - @test cl.ismapped(b) == false - end - end -end diff --git a/test/test_cmdqueue.jl b/test/test_cmdqueue.jl deleted file mode 100644 index 1b4caf7a..00000000 --- a/test/test_cmdqueue.jl +++ /dev/null @@ -1,47 +0,0 @@ -@testset "OpenCL.CmdQueue" begin - @testset "OpenCL.CmdQueue constructor" begin - has_warned = false - @test_throws MethodError cl.CmdQueue(nothing, nothing) - for platform in cl.platforms() - for device in cl.devices(platform) - ctx = cl.Context(device) - @test cl.CmdQueue(ctx) != nothing - @test cl.CmdQueue(ctx, device) != nothing - @test cl.CmdQueue(ctx, :profile) != nothing - try - cl.CmdQueue(ctx, device, :out_of_order) - cl.CmdQueue(ctx, device, (:profile, :out_of_order)) - catch err - if !has_warned - @warn("Platform $(device[:platform][:name]) does not seem to " * - "suport out of order queues: \n$err") - has_warned = true - end - end - @test_throws ArgumentError cl.CmdQueue(ctx, device, :unrecognized_flag) - for flag in [:profile, :out_of_order] - @test_throws ArgumentError cl.CmdQueue(ctx, (flag, :unrecognized_flag)) - @test_throws ArgumentError cl.CmdQueue(ctx, device, (:unrecognized_flag, flag)) - @test_throws ArgumentError cl.CmdQueue(ctx, (flag, flag)) - @test_throws ArgumentError cl.CmdQueue(ctx, device, (flag, flag)) - end - end - end - end - - @testset "OpenCL.CmdQueue info" begin - for platform in cl.platforms() - for device in cl.devices(platform) - ctx = cl.Context(device) - q1 = cl.CmdQueue(ctx) - q2 = cl.CmdQueue(ctx, device) - for q in (q1, q2) - @test q[:context] == ctx - @test q[:device] == device - @test q[:reference_count] > 0 - @test typeof(q[:properties]) == cl.CL_command_queue_properties - end - end - end - end -end diff --git a/test/test_context.jl b/test/test_context.jl deleted file mode 100644 index 44b67bce..00000000 --- a/test/test_context.jl +++ /dev/null @@ -1,123 +0,0 @@ - -function context_test_callback(arg1, arg2, arg3) - # We're not really testing it because, nvidia doesn't seem to care about this functionality: - # https://devtalk.nvidia.com/default/topic/497433/context-callback-never-called/ - OpenCL.cl.log_error("Callback works") - return -end - -function create_context_error(ctx) - empty_kernel = " - __kernel void test() { - int c = 1 + 1; - };" - try - p = cl.Program(ctx, source = empty_kernel) |> cl.build! - k = cl.Kernel(p, "test") - q = cl.CmdQueue(ctx) - q(k, 1, 10000000) - catch - end -end - - -@testset "OpenCL.Context" begin - @testset "OpenCL.Context constructor" begin - @test_throws MethodError (cl.Context([])) - for platform in cl.platforms() - for device in cl.devices(platform) - ctx = cl.Context(device) - @test ctx != nothing - ctx_id = ctx.id - ctx2 = cl.Context(ctx_id) - @test cl.is_ctx_id_alive(ctx_id) - @test ctx.id != C_NULL - @test ctx2.id != C_NULL - finalize(ctx) - @test ctx.id == C_NULL - @test ctx2.id != C_NULL - @test cl.is_ctx_id_alive(ctx_id) - finalize(ctx2) - @test ctx.id == C_NULL - @test ctx2.id == C_NULL - # jeez, this segfaults... WHY? I suspect a driver bug for refcount == 0? - # NVIDIA 381.22 - #@test !cl.is_ctx_id_alive(ctx_id) - @testset "Context callback" begin - ctx = cl.Context(device, callback = context_test_callback) - create_context_error(ctx) - end - end - end - end - - @testset "OpenCL.Context platform properties" begin - for platform in cl.platforms() - try - cl.Context(cl.CL_DEVICE_TYPE_CPU) - catch err - @test typeof(err) == cl.CLError - # CL_DEVICE_NOT_FOUND could be throw for GPU only drivers - @test err.desc in (:CL_INVALID_PLATFORM, - :CL_DEVICE_NOT_FOUND) - end - - if platform[:name] == "Portable Computing Language" - @warn("Skipping OpenCL.Context platform properties for " * - "Portable Computing Language Platform") - continue - end - - properties = [(cl.CL_CONTEXT_PLATFORM, platform)] - for (cl_dev_type, sym_dev_type) in [(cl.CL_DEVICE_TYPE_CPU, :cpu), - (cl.CL_DEVICE_TYPE_GPU, :gpu)] - if !cl.has_device_type(platform, sym_dev_type) - continue - end - @test cl.Context(sym_dev_type, properties=properties) != nothing - @test cl.Context(cl_dev_type, properties=properties) != nothing - ctx = cl.Context(cl_dev_type, properties=properties) - @test isempty(cl.properties(ctx)) == false - test_properties = cl.properties(ctx) - - @test test_properties == properties - - platform_in_properties = false - for (t, v) in test_properties - if t == cl.CL_CONTEXT_PLATFORM - @test v[:name] == platform[:name] - @test v == platform - platform_in_properties = true - break - end - end - @test platform_in_properties - end - try - ctx2 = cl.Context(cl.CL_DEVICE_TYPE_ACCELERATOR, - properties=properties) - catch err - @test typeof(err) == cl.CLError - @test err.desc == :CL_DEVICE_NOT_FOUND - end - end - end - - @testset "OpenCL.Context create_some_context" begin - @test cl.create_some_context() != nothing - @test typeof(cl.create_some_context()) == cl.Context - end - - @testset "OpenCL.Context parsing" begin - for platform in cl.platforms() - properties = [(cl.CL_CONTEXT_PLATFORM, platform)] - parsed_properties = cl._parse_properties(properties) - - @test isodd(length(parsed_properties)) - @test parsed_properties[end] == 0 - @test parsed_properties[1] == cl.cl_context_properties(cl.CL_CONTEXT_PLATFORM) - @test parsed_properties[2] == cl.cl_context_properties(platform.id) - end - end - -end diff --git a/test/test_device.jl b/test/test_device.jl deleted file mode 100644 index bd99a5cc..00000000 --- a/test/test_device.jl +++ /dev/null @@ -1,99 +0,0 @@ -@testset "OpenCL.Device" begin - @testset "Device Type" begin - for p in cl.platforms() - for (t, k) in zip((cl.CL_DEVICE_TYPE_GPU, cl.CL_DEVICE_TYPE_CPU, - cl.CL_DEVICE_TYPE_ACCELERATOR, cl.CL_DEVICE_TYPE_ALL), - (:gpu, :cpu, :accelerator, :all)) - - #for (dk, dt) in zip(cl.devices(p, k), cl.devices(p, t)) - # @fact dk == dt --> true - #end - #devices = cl.devices(p, k) - #for d in devices - # @fact d[:device_type] == t --> true - #end - end - end - end - - @testset "Device Equality" begin - for platform in cl.platforms() - devices = cl.devices(platform) - if length(devices) > 1 - test_dev = devices[1] - for dev in devices[2:end] - @test pointer(dev) != pointer(test_dev) - @test hash(dev) != hash(test_dev) - @test isequal(dev, test_dev) == false - end - end - end - - end - - @testset "Device Info" begin - device_info_keys = Symbol[ - :driver_version, - :version, - :extensions, - :platform, - :name, - :device_type, - :has_image_support, - :queue_properties, - :has_queue_out_of_order_exec, - :has_queue_profiling, - :has_native_kernel, - :vendor_id, - :max_compute_units, - :max_work_item_size, - :max_clock_frequency, - :address_bits, - :max_read_image_args, - :max_write_image_args, - :global_mem_size, - :max_mem_alloc_size, - :max_const_buffer_size, - :local_mem_size, - :has_local_mem, - :host_unified_memory, - :available, - :compiler_available, - :max_work_group_size, - :max_parameter_size, - :profiling_timer_resolution, - :max_image2d_shape, - :max_image3d_shape, - ] - for p in cl.platforms() - if occursin("Portable", p[:name]) - msg = "Skipping Device Info tests for Portable Computing Language Platform " - @warn(msg) - continue - end - @test isa(p, cl.Platform) - @test_throws ArgumentError p[:zjdlkf] - for d in cl.devices(p) - @test isa(d, cl.Device) - @test_throws ArgumentError d[:zjdlkf] - for k in device_info_keys - @test d[k] == cl.info(d, k) - if k == :extensions - @test isa(d[k], Array) - if length(d[k]) > 0 - @test isa(d[k], Array{String, 1}) - end - elseif k == :platform - @test d[k] == p - elseif k == :max_work_item_sizes - @test length(d[k]) == 3 - elseif k == :max_image2d_shape - @test length(d[k]) == 2 - elseif k == :max_image3d_shape - @test length(d[k]) == 3 - end - end - end - end - end -end diff --git a/test/test_event.jl b/test/test_event.jl deleted file mode 100644 index 1cd15b97..00000000 --- a/test/test_event.jl +++ /dev/null @@ -1,107 +0,0 @@ -@testset "OpenCL.Event" begin - @testset "OpenCL.Event status" begin - for platform in cl.platforms() - if occursin("Portable", platform[:name]) - msg = "$(platform[:name]) does not implement User Events" - @warn(msg) - continue - end - - for device in cl.devices(platform) - ctx = cl.Context(device) - evt = cl.UserEvent(ctx) - evt[:status] - @test evt[:status] == :submitted - cl.complete(evt) - @test evt[:status] == :complete - finalize(evt) - end - end - end - - @testset "OpenCL.Event wait" begin - for platform in cl.platforms() - if occursin("Portable", platform[:name]) || - occursin("Intel Gen OCL", platform[:name]) - msg = "$(platform[:name]) does not implement User Events or shows other problems" - @warn(msg) - continue - end - - for device in cl.devices(platform) - ctx = cl.Context(device) - # create user event - usr_evt = cl.UserEvent(ctx) - q = cl.CmdQueue(ctx) - cl.enqueue_wait_for_events(q, usr_evt) - - # create marker event - mkr_evt = cl.enqueue_marker(q) - - @test usr_evt[:status] == :submitted - @test mkr_evt[:status] in (:queued, :submitted) - - cl.complete(usr_evt) - @test usr_evt[:status] == :complete - - cl.wait(mkr_evt) - @test mkr_evt[:status] == :complete - - @test cl.cl_event_status(:running) == cl.CL_RUNNING - @test cl.cl_event_status(:submitted) == cl.CL_SUBMITTED - @test cl.cl_event_status(:queued) == cl.CL_QUEUED - @test cl.cl_event_status(:complete) == cl.CL_COMPLETE - end - end - end - - @testset "OpenCL.Event callback" begin - for platform in cl.platforms() - v = cl.opencl_version(platform) - if v.major == 1 && v.minor < 1 - info("Skipping OpenCL.Event callback for $(platform[:name]) version < 1.1") - continue - end - - if occursin("Portable", platform[:name]) || - occursin("Intel Gen OCL", platform[:name]) - msg = "$(platform[:name]) does not implement User Events or shows other problems." - @warn(msg) - continue - end - - for device in cl.devices(platform) - global callback_called = Ref(false) - - function test_callback(evt, status) - callback_called[] = true - end - - ctx = cl.Context(device) - usr_evt = cl.UserEvent(ctx) - queue = cl.CmdQueue(ctx) - - cl.enqueue_wait_for_events(queue, usr_evt) - - mkr_evt = cl.enqueue_marker(queue) - cl.add_callback(mkr_evt, test_callback) - - @test usr_evt[:status] == :submitted - @test mkr_evt[:status] in (:queued, :submitted) - @test !callback_called[] - - cl.complete(usr_evt) - @test usr_evt[:status] == :complete - - cl.wait(mkr_evt) - - # Give callback some time to finish - yield() - sleep(0.5) - - @test mkr_evt[:status] == :complete - @test callback_called[] - end - end - end -end diff --git a/test/test_kernel.jl b/test/test_kernel.jl deleted file mode 100644 index 4f3893f9..00000000 --- a/test/test_kernel.jl +++ /dev/null @@ -1,257 +0,0 @@ -struct CLTestStruct - f1::NTuple{3, Float32} - f2::Nothing - f3::Float32 -end - -@testset "OpenCL.Kernel" begin - - test_source = " - __kernel void sum(__global const float *a, - __global const float *b, - __global float *c, - const unsigned int count) - { - unsigned int gid = get_global_id(0); - if (gid < count) { - c[gid] = a[gid] + b[gid]; - } - } - " - - #TODO: tests for invalid kernel build error && logs... - - @testset "OpenCL.Kernel constructor" begin - for device in cl.devices() - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL.Kernel constructor for " * - "Portable Computing Language Platform") - continue - end - ctx = cl.Context(device) - prg = cl.Program(ctx, source=test_source) - @test_throws ArgumentError cl.Kernel(prg, "sum") - cl.build!(prg) - @test cl.Kernel(prg, "sum") != nothing - end - end - - @testset "OpenCL.Kernel info" begin - for device in cl.devices() - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL.Kernel info for Portable Computing Language Platform") - continue - end - ctx = cl.Context(device) - prg = cl.Program(ctx, source=test_source) - cl.build!(prg) - k = cl.Kernel(prg, "sum") - @test k[:name] == "sum" - @test k[:num_args] == 4 - @test k[:reference_count] > 0 - @test k[:program] == prg - @test typeof(k[:attributes]) == String - end - end - - @testset "OpenCL.Kernel mem/workgroup size" begin - for device in cl.devices() - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL.Kernel mem/workgroup size for Portable Computing Language Platform") - continue - end - ctx = cl.Context(device) - prg = cl.Program(ctx, source=test_source) - cl.build!(prg) - k = cl.Kernel(prg, "sum") - for (sf, clf) in [(:size, cl.CL_KERNEL_WORK_GROUP_SIZE), - (:compile_size, cl.CL_KERNEL_COMPILE_WORK_GROUP_SIZE), - (:local_mem_size, cl.CL_KERNEL_LOCAL_MEM_SIZE), - (:private_mem_size, cl.CL_KERNEL_PRIVATE_MEM_SIZE), - (:prefered_size_multiple, cl.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)] - @test cl.work_group_info(k, sf, device) != nothing - @test cl.work_group_info(k, clf, device) != nothing - if sf != :compile_size - @test cl.work_group_info(k, sf, device) == cl.work_group_info(k, clf, device) - end - end - end - end - - @testset "OpenCL.Kernel set_arg!/set_args!" begin - for device in cl.devices() - - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL.Kernel mem/workgroup size for Portable Computing Language Platform") - continue - end - - ctx = cl.Context(device) - queue = cl.CmdQueue(ctx) - - prg = cl.Program(ctx, source=test_source) |> cl.build! - k = cl.Kernel(prg, "sum") - - count = 1024 - nbytes = count * sizeof(Float32) - - h_ones = ones(Float32, count) - - A = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=h_ones) - B = cl.Buffer(Float32, ctx, (:r, :copy), hostbuf=h_ones) - C = cl.Buffer(Float32, ctx, :w, count) - - # sizeof mem object for buffer in bytes - @test sizeof(A) == nbytes - @test sizeof(B) == nbytes - @test sizeof(C) == nbytes - - # we use julia's index by one convention - @test cl.set_arg!(k, 1, A) != nothing - @test cl.set_arg!(k, 2, B) != nothing - @test cl.set_arg!(k, 3, C) != nothing - @test cl.set_arg!(k, 4, UInt32(count)) != nothing - - cl.enqueue_kernel(queue, k, count) |> cl.wait - r = cl.read(queue, C) - - @test all(x -> x == 2.0, r) - cl.flush(queue) - - # test set_args with new kernel - k2 = cl.Kernel(prg, "sum") - cl.set_args!(k2, A, B, C, UInt32(count)) - - h_twos = fill(2f0, count) - cl.copy!(queue, A, h_twos) - cl.copy!(queue, B, h_twos) - - #TODO: check for ocl version, fill is opencl v1.2 - #cl.enqueue_fill(queue, A, 2f0) - #cl.enqueue_fill(queue, B, 2f0) - - cl.enqueue_kernel(queue, k, count) - cl.finish(queue) - - r = cl.read(queue, C) - - @test all(x -> x == 4.0, r) - end - end - - @testset "OpenCL.Kernel enqueue_kernel" begin - for device in cl.devices() - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL.Kernel mem/workgroup size for Portable Computing Language Platform") - continue - end - - simple_kernel = " - __kernel void test(__global float *i) { - *i += 1; - };" - - ctx = cl.Context(device) - - h_buff = Float32[1,] - d_buff = cl.Buffer(Float32, ctx, (:rw, :copy), hostbuf=h_buff) - - p = cl.Program(ctx, source=simple_kernel) |> cl.build! - k = cl.Kernel(p, "test") - q = cl.CmdQueue(ctx) - - # dimensions must be the same size - @test_throws ArgumentError q(k, (1,), (1,1), d_buff) - @test_throws ArgumentError q(k, (1,1), (1,), d_buff) - - # dimensions are bounded - max_work_dim = device[:max_work_item_dims] - bad = tuple([1 for _ in 1:(max_work_dim + 1)]) - @test_throws MethodError q(k, bad, d_buff) - - # devices have finite work sizes - @test_throws MethodError q(k, (typemax(Int),), d_buff) - - # blocking call to kernel finishes cmd queue - q(k, 1, 1, d_buff) - - r = cl.read(q, d_buff) - @test r[1] == 2 - - # alternative kernel call syntax - k[q, (1,), (1,)](d_buff) - r = cl.read(q, d_buff) - @test r[1] == 3 - - # enqueue task is an alias for calling - # a kernel with a global/local size of 1 - evt = cl.enqueue_task(q, k) - r = cl.read(q, d_buff) - @test r[1] == 4 - end - end - - - test_source = " - struct __attribute__((packed)) Test2{ - long f1; - int __attribute__((aligned (8))) f2; - }; - __kernel void structest(__global float *out, struct Test2 b){ - out[0] = b.f1; - out[1] = b.f2; - } - " - for device in cl.devices() - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL.Kernel constructor for " * - "Portable Computing Language Platform") - continue - end - Sys.isapple() && continue - ctx = cl.Context(device) - prg = cl.Program(ctx, source = test_source) - queue = cl.CmdQueue(ctx) - cl.build!(prg) - structkernel = cl.Kernel(prg, "structest") - out = cl.Buffer(Float32, ctx, :w, 2) - bstruct = (1, Int32(4)) - structkernel[queue, (1,)](out, bstruct) - r = cl.read(queue, out) - @test r == [1f0, 4f0] - end - - test_source = " - //packed - struct __attribute__((packed)) Test{ - float3 f1; - int f2; // empty type gets replaced with Int32 (no empty types allowed in OpenCL) - // you might need to define the alignement of fields to match julia's layout - float f3; // for the types used here the alignement matches though! - }; - __kernel void structest(__global float *out, struct Test a){ - out[0] = a.f1.x; - out[1] = a.f1.y; - out[2] = a.f1.z; - out[3] = a.f3; - } - " - - for device in cl.devices() - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL.Kernel constructor for " * - "Portable Computing Language Platform") - continue - end - ctx = cl.Context(device) - prg = cl.Program(ctx, source = test_source) - queue = cl.CmdQueue(ctx) - cl.build!(prg) - structkernel = cl.Kernel(prg, "structest") - out = cl.Buffer(Float32, ctx, :w, 4) - astruct = CLTestStruct((1f0, 2f0, 3f0), nothing, 22f0) - structkernel[queue, (1,)](out, astruct) - r = cl.read(queue, out) - @test r == [1f0, 2f0, 3f0, 22f0] - end -end diff --git a/test/test_minver.jl b/test/test_minver.jl deleted file mode 100644 index b02e60aa..00000000 --- a/test/test_minver.jl +++ /dev/null @@ -1,40 +0,0 @@ -@testset "OpenCL.Minver" begin - @testset "OpenCL.Minver platform" begin - for platform in cl.platforms() - - version = cl.opencl_version(platform) - - v11 = cl.min_v11(platform) - v12 = cl.min_v12(platform) - v20 = cl.min_v20(platform) - v21 = cl.min_v21(platform) - v22 = cl.min_v22(platform) - - @test v11 == (version >= v"1.1") - @test v12 == (version >= v"1.2") - @test v20 == (version >= v"2.0") - @test v21 == (version >= v"2.1") - @test v22 == (version >= v"2.2") - end - end - - @testset "OpenCL.Minver device" begin - for platform in cl.platforms() - for device in cl.devices(platform) - version = cl.opencl_version(device) - - v11 = cl.min_v11(device) - v12 = cl.min_v12(device) - v20 = cl.min_v20(device) - v21 = cl.min_v21(device) - v22 = cl.min_v22(device) - - @test v11 == (version >= v"1.1") - @test v12 == (version >= v"1.2") - @test v20 == (version >= v"2.0") - @test v21 == (version >= v"2.1") - @test v22 == (version >= v"2.2") - end - end - end -end diff --git a/test/test_platform.jl b/test/test_platform.jl deleted file mode 100644 index ff95740c..00000000 --- a/test/test_platform.jl +++ /dev/null @@ -1,34 +0,0 @@ -@testset "OpenCL.Platform" begin - @testset "Platform Info" begin - @test length(cl.platforms()) == cl.num_platforms() - for p in cl.platforms() - @testset "Platform $(p[:profile])" begin - @test p != nothing - @test pointer(p) != C_NULL - for k in [:profile, :version, :name, :vendor, :extensions] - @test p[k] == cl.info(p, k) - end - v = cl.opencl_version(p) - @test 1 <= v.major <= 3 - @test 0 <= v.minor <= 2 - end - end - end - - @testset "Platform Equality" begin - platform = cl.platforms()[1] - platform_copy = cl.platforms()[1] - - @test pointer(platform) == pointer(platform_copy) - @test hash(platform) == hash(platform_copy) - @test isequal(platform, platform) - - if length(cl.platforms()) > 1 - for p in cl.platforms()[2:end] - @test pointer(platform) != pointer(p) - @test hash(platform) != hash(p) - @test !isequal(platform, p) - end - end - end -end diff --git a/test/test_program.jl b/test/test_program.jl deleted file mode 100644 index 818f6dc2..00000000 --- a/test/test_program.jl +++ /dev/null @@ -1,99 +0,0 @@ -@testset "OpenCL.Program" begin - - test_source = " - __kernel void sum(__global const float *a, - __global const float *b, - __global float *c) - { - uint gid = get_global_id(0); - c[gid] = a[gid] + b[gid]; - } - " - - function create_test_program() - ctx = cl.create_some_context() - cl.Program(ctx, source=test_source) - end - - @testset "OpenCL.Program source constructor" begin - for device in cl.devices() - ctx = cl.Context(device) - prg = cl.Program(ctx, source=test_source) - @test prg != nothing - end - end - @testset "OpenCL.Program info" begin - for device in cl.devices() - ctx = cl.Context(device) - prg = cl.Program(ctx, source=test_source) - - @test prg[:context] == ctx - - @test typeof(prg[:devices]) == Vector{cl.Device} - @test length(prg[:devices]) > 0 - @test device in prg[:devices] - - @test typeof(prg[:source]) == String - @test prg[:source] == test_source - - @test prg[:reference_count] > 0 - @test isempty(strip(prg[:build_log][device])) - end - end - - @testset "OpenCL.Program build" begin - for device in cl.devices() - @testset "Device $(device)" begin - ctx = cl.Context(device) - prg = cl.Program(ctx, source=test_source) - @test cl.build!(prg) != nothing - - # BUILD_SUCCESS undefined in POCL implementation.. - if device[:platform][:name] == "Portable Computing Language" - @warn("Skipping OpenCL.Program build for Portable Computing Language Platform") - continue - end - @test prg[:build_status][device] == cl.CL_BUILD_SUCCESS - - # test build by methods chaining - @test prg[:build_status][device] == cl.CL_BUILD_SUCCESS - if device[:platform][:name] != "Intel(R) OpenCL" - # The intel CPU driver is very verbose on Linux and output - # compilation status even without any warnings - @test isempty(strip(prg[:build_log][device])) - end - end - end - end - - @testset "OpenCL.Program source code" begin - for device in cl.devices() - ctx = cl.Context(device) - prg = cl.Program(ctx, source=test_source) - @test prg[:source] == test_source - end - end - - @testset "OpenCL.Program binaries" begin - for device in cl.devices() - ctx = cl.Context(device) - prg = cl.Program(ctx, source=test_source) |> cl.build! - - @test device in collect(keys(prg[:binaries])) - binaries = prg[:binaries] - @test device in collect(keys(binaries)) - @test binaries[device] != nothing - @test length(binaries[device]) > 0 - prg2 = cl.Program(ctx, binaries=binaries) - @test prg2[:binaries] == binaries - try - prg2[:source] - error("should not happen") - catch err - @test isa(err, cl.CLError) - @test err.code == -45 - @test err.desc == :CL_INVALID_PROGRAM_EXECUTABLE - end - end - end -end