Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add support to generic address space #994

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions include/clspv/FeatureMacro.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ enum class FeatureMacro {
__opencl_c_subgroups,
// following items are not supported
__opencl_c_device_enqueue,
__opencl_c_generic_address_space,
__opencl_c_pipes,
__opencl_c_program_scope_global_variables,
// following items are always enabled, but no point in complaining if they are
Expand All @@ -44,7 +43,8 @@ enum class FeatureMacro {
__opencl_c_read_write_images,
__opencl_c_atomic_scope_device,
__opencl_c_atomic_scope_all_devices,
__opencl_c_work_group_collective_functions
__opencl_c_work_group_collective_functions,
__opencl_c_generic_address_space,
};

#define FeatureStr(f) std::make_pair(FeatureMacro::f, #f)
Expand All @@ -53,6 +53,8 @@ constexpr std::array<std::pair<FeatureMacro, const char *>, 15>
FeatureStr(__opencl_c_3d_image_writes),
FeatureStr(__opencl_c_atomic_order_acq_rel),
FeatureStr(__opencl_c_fp64), FeatureStr(__opencl_c_images),
FeatureStr(__opencl_c_generic_address_space),
FeatureStr(__opencl_c_program_scope_global_variables),
FeatureStr(__opencl_c_subgroups),
// following items are always enabled by clang
FeatureStr(__opencl_c_int64),
Expand All @@ -62,10 +64,7 @@ constexpr std::array<std::pair<FeatureMacro, const char *>, 15>
FeatureStr(__opencl_c_atomic_scope_all_devices),
FeatureStr(__opencl_c_work_group_collective_functions),
// following items cannot be enabled so are automatically disabled
FeatureStr(__opencl_c_device_enqueue),
FeatureStr(__opencl_c_generic_address_space),
FeatureStr(__opencl_c_pipes),
FeatureStr(__opencl_c_program_scope_global_variables)};
FeatureStr(__opencl_c_device_enqueue), FeatureStr(__opencl_c_pipes)};
#undef FeatureStr

FeatureMacro FeatureMacroLookup(const std::string &name);
Expand Down
3 changes: 2 additions & 1 deletion include/clspv/Option.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ SourceLanguage Language();
// Returns true when the source language makes use of the generic address space.
inline bool LanguageUsesGenericAddressSpace() {
return (Language() == SourceLanguage::OpenCL_CPP) ||
((Language() == SourceLanguage::OpenCL_C_20));
(Language() == SourceLanguage::OpenCL_C_20) ||
(Language() == SourceLanguage::OpenCL_C_30);
}

// Return the SPIR-V binary version
Expand Down
3 changes: 3 additions & 0 deletions lib/BuiltinsEnum.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ enum BuiltinType : unsigned int {

kType_MemoryFence_Start,
kGetFence,
kToGlobal,
kToLocal,
kToPrivate,
kMemFence,
kReadMemFence,
kWriteMemFence,
Expand Down
3 changes: 3 additions & 0 deletions lib/BuiltinsMap.inc
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,9 @@ static std::unordered_map<const char *, Builtins::BuiltinType, cstr_hash,
{"set_user_event_status", Builtins::kSetUserEventStatus},

// MemoryFence
{"__to_global", Builtins::kToGlobal},
{"__to_local", Builtins::kToLocal},
{"__to_private", Builtins::kToPrivate},
{"get_fence", Builtins::kGetFence},
{"mem_fence", Builtins::kMemFence},
{"read_mem_fence", Builtins::kReadMemFence},
Expand Down
1 change: 1 addition & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ add_library(clspv_passes OBJECT
${CMAKE_CURRENT_SOURCE_DIR}/SPIRVOp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SPIRVProducerPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/RemoveUnusedArguments.cpp
${CMAKE_CURRENT_SOURCE_DIR}/TransformGenericVolatileMemoryAccess.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ReorderBasicBlocksPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ReplaceLLVMIntrinsicsPass.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ReplaceOpenCLBuiltinPass.cpp
Expand Down
21 changes: 19 additions & 2 deletions lib/ClusterConstants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,20 @@ clspv::ClusterModuleScopeConstantVars::run(Module &M, ModuleAnalysisManager &) {
initializers_alignment[GV.getInitializer()] = GV.getAlignment();
}
}
else if (GV.getType()->getPointerAddressSpace() == clspv::AddressSpace::Global) {
if (GV.use_empty()) {
dead_global_constants.push_back(&GV);
} else {
global_constants.push_back(&GV);
if (GV.hasInitializer()) {
initializers.insert(GV.getInitializer());
initializers_alignment[GV.getInitializer()] = GV.getAlignment();
} else {
initializers.insert(Constant::getNullValue(GV.getType()));
initializers_alignment[Constant::getNullValue(GV.getType())] = GV.getAlignment();
}
}
}
}

for (GlobalVariable *GV : dead_global_constants) {
Expand Down Expand Up @@ -127,10 +141,10 @@ clspv::ClusterModuleScopeConstantVars::run(Module &M, ModuleAnalysisManager &) {
Constant *clustered_initializer =
ConstantStruct::get(type, initializers_as_vec);
GlobalVariable *clustered_gv = new GlobalVariable(
M, type, true, GlobalValue::InternalLinkage, clustered_initializer,
M, type, false, GlobalValue::InternalLinkage, clustered_initializer,
clspv::ClusteredConstantsVariableName(), nullptr,
GlobalValue::ThreadLocalMode::NotThreadLocal,
clspv::AddressSpace::Constant);
clspv::AddressSpace::Global);
assert(clustered_gv);
clustered_gv->setAlignment(MaybeAlign(max_alignment));

Expand All @@ -148,7 +162,10 @@ clspv::ClusterModuleScopeConstantVars::run(Module &M, ModuleAnalysisManager &) {
Instruction *gep = GetElementPtrInst::CreateInBounds(
clustered_gv->getValueType(), clustered_gv,
{zero, Builder.getInt32(index)}, "", inst);
// TODO: Handle cases for module constants where we have the user is a gep and a load user for the gep. This case happens when we have a module constant and a global variable in the same opencl module.
user->replaceUsesOfWith(GV, gep);
} else if (dyn_cast<ConstantExpr>(user)) {
// Will be handled with instructions.
} else {
errs() << "Don't know how to handle updating user of __constant: "
<< *user << "\n";
Expand Down
15 changes: 15 additions & 0 deletions lib/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,21 @@ int RunPassPipeline(llvm::Module &M, llvm::raw_svector_ostream *binaryStream) {
pm.addPass(clspv::FixupBuiltinsPass());
pm.addPass(clspv::ThreeElementVectorLoweringPass());

// Lower longer vectors when requested. Note that this pass depends on
// ReplaceOpenCLBuiltinPass and expects DeadCodeEliminationPass to be run
// afterwards.
if (clspv::Option::LongVectorSupport()) {
pm.addPass(clspv::LongVectorLoweringPass());
}
// Volatile information on loads and stores are not used inside
// SPIRVProducer pass, so it doesn't have any effect on the generated code,
// but they stop the mem2reg pass from optimizing them. CLSPV try to get rid
// of generic address spaces by inferring them and optimizing them through
// "InferAddressSpacePass" and "mem2reg" pass. However, volatile loads and
// stores will stop mem2reg. So, we remove volatile info on loads and stores
// so that we could use mem2reg optimization on them and remove generic
// address spaces.
pm.addPass(clspv::TransformGenericVolatileMemoryAccess());
// We need to run mem2reg and inst combine early because our
// createInlineFuncWithPointerBitCastArgPass pass cannot handle the
// pattern
Expand Down
4 changes: 1 addition & 3 deletions lib/FeatureMacro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ namespace clspv {
FeatureMacro FeatureMacroLookup(const std::string &name) {
constexpr std::array<FeatureMacro, 4> NotSuppported{
FeatureMacro::__opencl_c_pipes,
FeatureMacro::__opencl_c_generic_address_space,
FeatureMacro::__opencl_c_device_enqueue,
FeatureMacro::__opencl_c_program_scope_global_variables};
FeatureMacro::__opencl_c_device_enqueue};

const auto macro_itr = std::find_if(
FeatureMacroList.begin(), FeatureMacroList.end(),
Expand Down
6 changes: 5 additions & 1 deletion lib/LongVectorLoweringPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,11 @@ Value *clspv::LongVectorLoweringPass::visitCastInst(CastInst &I) {
V = B.CreateIntToPtr(EquivalentValue, EquivalentDestTy, I.getName());
break;
}

case Instruction::AddrSpaceCast: {
IRBuilder<> B(&I);
V = B.CreateAddrSpaceCast(EquivalentValue, EquivalentDestTy, I.getName());
break;
}
default:
llvm_unreachable("Cast unsupported.");
break;
Expand Down
2 changes: 1 addition & 1 deletion lib/NormalizeGlobalVariable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ void NormalizeGlobalVariables(Module &M) {
SmallVector<GlobalVariable *, 8> globals;
for (auto &GV : M.globals()) {
if (GV.hasInitializer() && GV.getType()->getPointerAddressSpace() ==
clspv::AddressSpace::Constant) {
clspv::AddressSpace::Global) {
globals.push_back(&GV);
}
}
Expand Down
1 change: 1 addition & 0 deletions lib/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ MODULE_PASS("native-math", clspv::NativeMathPass)
MODULE_PASS("opencl-inliner", clspv::OpenCLInlinerPass)
MODULE_PASS("physical-pointer-args", clspv::PhysicalPointerArgsPass)
MODULE_PASS("remove-unused-arguments", clspv::RemoveUnusedArguments)
MODULE_PASS("transform-generic-volatile-memory-access", clspv::TransformGenericVolatileMemoryAccess)
MODULE_PASS("replace-llvm-intrinsics", clspv::ReplaceLLVMIntrinsicsPass)
MODULE_PASS("replace-opencl-builtin", clspv::ReplaceOpenCLBuiltinPass)
MODULE_PASS("replace-pointer-bitcast", clspv::ReplacePointerBitcastPass)
Expand Down
1 change: 1 addition & 0 deletions lib/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include "SplatSelectCondition.h"
#include "StripFreezePass.h"
#include "ThreeElementVectorLoweringPass.h"
#include "TransformGenericVolatileMemoryAccess.h"
#include "UBOTypeTransformPass.h"
#include "UndoBoolPass.h"
#include "UndoByvalPass.h"
Expand Down
20 changes: 20 additions & 0 deletions lib/ReplaceOpenCLBuiltinPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ std::set<Builtins::BuiltinType> ReplaceOpenCLBuiltinPass::ReplaceableBuiltins =
Builtins::kSubGroupBarrier,
Builtins::kAtomicWorkItemFence,
Builtins::kGetFence,
Builtins::kToGlobal,
Builtins::kToLocal,
Builtins::kToPrivate,
Builtins::kMemFence,
Builtins::kReadMemFence,
Builtins::kWriteMemFence,
Expand Down Expand Up @@ -528,6 +531,12 @@ bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
return replaceAtomicLoad(F);
case Builtins::kGetFence:
return replaceGetFence(F);
case Builtins::kToGlobal:
return replaceAddressSpaceQualifiers(F, AddressSpace::Global);
case Builtins::kToLocal:
return replaceAddressSpaceQualifiers(F, AddressSpace::Local);
case Builtins::kToPrivate:
return replaceAddressSpaceQualifiers(F, AddressSpace::Private);
case Builtins::kAtomicInit:
case Builtins::kAtomicStore:
case Builtins::kAtomicStoreExplicit:
Expand Down Expand Up @@ -3783,6 +3792,17 @@ bool ReplaceOpenCLBuiltinPass::replaceGetFence(Function &F) {
});
}

bool ReplaceOpenCLBuiltinPass::replaceAddressSpaceQualifiers(Function &F, unsigned ToAddressSpace) {
return replaceCallsWithValue(F, [=](CallInst *Call) {
auto pointer = Call->getArgOperand(0);
// Clang emits an address space cast to the generic address space. Skip the
// cast and use the input directly.

IRBuilder<> builder(Call);
return builder.CreateAddrSpaceCast(pointer, PointerType::get(pointer->getType()->getNonOpaquePointerElementType(), ToAddressSpace));
});
}

bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
Expand Down
1 change: 1 addition & 0 deletions lib/ReplaceOpenCLBuiltinPass.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ struct ReplaceOpenCLBuiltinPass
bool replaceAtomics(llvm::Function &F, llvm::AtomicRMWInst::BinOp Op);
bool replaceAtomicLoad(llvm::Function &F);
bool replaceGetFence(llvm::Function &F);
bool replaceAddressSpaceQualifiers(llvm::Function &F, unsigned ToAddressSpace);
bool replaceExplicitAtomics(llvm::Function &F, spv::Op Op,
spv::MemorySemanticsMask semantics =
spv::MemorySemanticsAcquireReleaseMask);
Expand Down
8 changes: 4 additions & 4 deletions lib/SPIRVProducerPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ void SPIRVProducerPassImpl::FindGlobalConstVars() {
SmallVector<GlobalVariable *, 8> GVList;
SmallVector<GlobalVariable *, 8> DeadGVList;
for (GlobalVariable &GV : module->globals()) {
if (GV.getType()->getAddressSpace() == AddressSpace::Constant) {
if (GV.getType()->getAddressSpace() == AddressSpace::Global) {
if (GV.use_empty()) {
DeadGVList.push_back(&GV);
} else {
Expand Down Expand Up @@ -1260,7 +1260,7 @@ void SPIRVProducerPassImpl::FindTypesForResourceVars() {
PointerType *PTy = cast<PointerType>(GV.getType());
const auto AS = PTy->getAddressSpace();
const bool module_scope_constant_external_init =
(AS == AddressSpace::Constant) && GV.hasInitializer();
(AS == AddressSpace::Global || AS == AddressSpace::Constant) && GV.hasInitializer();
const spv::BuiltIn BuiltinType = GetBuiltin(GV.getName());
if (module_scope_constant_external_init &&
spv::BuiltInMax == BuiltinType) {
Expand Down Expand Up @@ -2312,7 +2312,7 @@ SPIRVID SPIRVProducerPassImpl::getSPIRVConstant(Constant *C) {
llvm_unreachable("Unhandled function declaration/definition");
} else if (auto *ConstExpr = dyn_cast<ConstantExpr>(Cst)) {
// If there is exactly one use we know where to insert the instruction
if (ConstExpr->getNumUses() == 1) {
if (ConstExpr->getNumUses() <= 2) {
auto *User = *ConstExpr->user_begin();
auto *EquivInstr =
ConstExpr->getAsInstruction(dyn_cast<Instruction>(User));
Expand Down Expand Up @@ -2803,7 +2803,7 @@ void SPIRVProducerPassImpl::GenerateGlobalVar(GlobalVariable &GV) {
const auto spvSC = GetStorageClass(AS);

const bool module_scope_constant_external_init =
(AS == AddressSpace::Constant) && GV.hasInitializer() &&
(AS == AddressSpace::Global || AS == AddressSpace::Constant) && GV.hasInitializer() &&
clspv::Option::ModuleConstantsInStorageBuffer();

if (GV.hasInitializer()) {
Expand Down
73 changes: 73 additions & 0 deletions lib/TransformGenericVolatileMemoryAccess.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright 2019 The Clspv Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"

#include "clspv/AddressSpace.h"

#include "TransformGenericVolatileMemoryAccess.h"

using namespace llvm;

PreservedAnalyses
clspv::TransformGenericVolatileMemoryAccess::run(Module &M,
ModuleAnalysisManager &) {
PreservedAnalyses PA;

SmallVector<Instruction *> DeadInsts;

for (auto &F : M.functions()) {
for (auto &BB : F) {
for (auto &I : BB) {
IRBuilder<> B(&I);

if (auto *load = dyn_cast<LoadInst>(&I)) {
if (load->isVolatile() &&
getPointerAddressSpace(load->getPointerOperandType()) ==
clspv::AddressSpace::Generic) {
auto NonVolatileLoad =
B.CreateLoad(load->getType(), load->getPointerOperand());
load->replaceAllUsesWith(NonVolatileLoad);
DeadInsts.push_back(load);
}
} else if (auto *store = dyn_cast<StoreInst>(&I)) {
if (store->isVolatile() &&
getPointerAddressSpace(store->getPointerOperandType()) ==
clspv::AddressSpace::Generic) {
B.CreateStore(store->getValueOperand(), store->getPointerOperand());
DeadInsts.push_back(store);
}
}
}
}
}

for (auto Inst : DeadInsts) {
Inst->eraseFromParent();
}

return PA;
}

unsigned clspv::TransformGenericVolatileMemoryAccess::getPointerAddressSpace(
Type *PtrTy) const {
if (PtrTy->getNonOpaquePointerElementType()->isPointerTy()) {
return getPointerAddressSpace(PtrTy->getNonOpaquePointerElementType());
}
return PtrTy->getPointerAddressSpace();
}
30 changes: 30 additions & 0 deletions lib/TransformGenericVolatileMemoryAccess.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright 2022 The Clspv Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"

#ifndef _CLSPV_LIB_TRANSFORM_GENERIC_VOLATILE_MEMORY_ACCESS_PASS_H
#define _CLSPV_LIB_TRANSFORM_GENERIC_VOLATILE_MEMORY_ACCESS_PASS_H

namespace clspv {
struct TransformGenericVolatileMemoryAccess
: llvm::PassInfoMixin<TransformGenericVolatileMemoryAccess> {
llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &);

unsigned getPointerAddressSpace(llvm::Type *ptr) const;
};
} // namespace clspv

#endif // _CLSPV_LIB_TRANSFORM_GENERIC_VOLATILE_MEMORY_ACCESS_PASS_H
Loading