From 918bbe69d84802508f8739ceb2b5b522e4a74d75 Mon Sep 17 00:00:00 2001 From: bmanga Date: Tue, 28 May 2024 17:00:16 +0100 Subject: [PATCH] Review fixes --- lib/Compiler.cpp | 7 ++++--- test/LLVMIntrinsics/descend_into_array.cl | 8 ++++---- test/LLVMIntrinsics/memcpy_from_constant.cl | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/Compiler.cpp b/lib/Compiler.cpp index ff4070ff7..12aeefb45 100644 --- a/lib/Compiler.cpp +++ b/lib/Compiler.cpp @@ -525,6 +525,10 @@ int RunPassPipeline(llvm::Module &M, llvm::raw_svector_ostream *binaryStream) { pm.addPass(clspv::AutoPodArgsPass()); pm.addPass(clspv::DeclarePushConstantsPass()); pm.addPass(clspv::DefineOpenCLWorkItemBuiltinsPass()); + // Replace the LLVM intrinsics. This will give them a chance to be better + // optimized through the pipeline. It also helps with generic address space + // lowering. + pm.addPass(clspv::ReplaceLLVMIntrinsicsPass()); // RewritePackedStructsPass will rewrite packed struct types, and // ReplacePointerBitcastPass will lower the new packed struct type. So, @@ -571,9 +575,6 @@ int RunPassPipeline(llvm::Module &M, llvm::raw_svector_ostream *binaryStream) { // This pass needs to be after every inlining to make sure we are capable of // removing every addrspacecast. It only needs to run if generic addrspace // is used. - - pm.addPass(clspv::ReplaceLLVMIntrinsicsPass()); - if (clspv::Option::LanguageUsesGenericAddressSpace()) { pm.addPass(clspv::ReplaceOpenCLBuiltinPass()); pm.addPass(clspv::LowerAddrSpaceCastPass()); diff --git a/test/LLVMIntrinsics/descend_into_array.cl b/test/LLVMIntrinsics/descend_into_array.cl index 412e79da0..ca22cf93d 100644 --- a/test/LLVMIntrinsics/descend_into_array.cl +++ b/test/LLVMIntrinsics/descend_into_array.cl @@ -18,7 +18,7 @@ __kernel void foo(__global outer* out, global outer* in) { // CHECK: [[zero:%[0-9a-zA-Z_]+]] = OpConstant {{.*}} 0 // CHECK: [[dst:%[0-9a-zA-Z_]+]] = OpVariable {{.*}} StorageBuffer // CHECK: [[src:%[0-9a-zA-Z_]+]] = OpVariable {{.*}} StorageBuffer -// CHECK-DAG: [[src_gep:%[0-9a-zA-Z_]+]] = OpAccessChain {{.*}} [[src]] [[zero]] [[zero]] -// CHECK-DAG: OpLoad {{.*}} [[src_gep]] -// CHECK-DAG: [[dst_gep:%[0-9a-zA-Z_]+]] = OpAccessChain {{.*}} [[dst]] [[zero]] [[zero]] -// CHECK-DAG: OpStore [[dst_gep]] +// CHECK: [[dst_gep:%[0-9a-zA-Z_]+]] = OpAccessChain {{.*}} [[dst]] [[zero]] [[zero]] +// CHECK: [[src_gep:%[0-9a-zA-Z_]+]] = OpAccessChain {{.*}} [[src]] [[zero]] [[zero]] +// CHECK: OpLoad {{.*}} [[src_gep]] +// CHECK: OpStore [[dst_gep]] diff --git a/test/LLVMIntrinsics/memcpy_from_constant.cl b/test/LLVMIntrinsics/memcpy_from_constant.cl index e9ecfac9c..a6d34d5f1 100644 --- a/test/LLVMIntrinsics/memcpy_from_constant.cl +++ b/test/LLVMIntrinsics/memcpy_from_constant.cl @@ -51,4 +51,4 @@ void kernel memcpy_from_constant(global float* result) { // CHECK: OpStore [[ssbo_gep3]] [[gep3]] // CHECK: [[ssbo_gep4:%[a-zA-Z0-9_]+]] = OpAccessChain [[ptr_ssbo_float]] [[ssbo]] [[uint_0]] [[uint_4]] -// CHECK: OpStore [[ssbo_gep4]] [[gep4]] \ No newline at end of file +// CHECK: OpStore [[ssbo_gep4]] [[gep4]]