Skip to content

Commit

Permalink
Merge pull request #158 from frasercrmck/create-loop-no-magic
Browse files Browse the repository at this point in the history
[compiler] Ensure createLoop always creates a loop
  • Loading branch information
frasercrmck authored Jan 11, 2024
2 parents 3b1652a + 927452e commit 965e267
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 91 deletions.
25 changes: 13 additions & 12 deletions modules/compiler/test/lit/passes/subgroup-loop-unroll.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,18 @@ attributes #4 = { alwaysinline norecurse nounwind }
!13 = !{i32 32, i32 0, i32 0, i32 0}
!20 = !{!13, ptr @sub_group_all_builtin}

;CHECK-LABEL: sw.bb2:
;CHECK: %[[BARRIER0:.+]] = getelementptr inbounds %__vecz_v32_sub_group_all_builtin_live_mem_info, ptr %live_variables, i64 0
;CHECK: %[[ITEM0:.+]] = getelementptr inbounds %__vecz_v32_sub_group_all_builtin_live_mem_info, ptr %[[BARRIER0]], i32 0, i32 0
;CHECK: %[[LD0:.+]] = load i1, ptr %[[ITEM0]], align 1
;CHECK: %[[ACCUM0:.+]] = and i1 true, %[[LD0]]
;CHECK: %[[BARRIER1:.+]] = getelementptr inbounds %__vecz_v32_sub_group_all_builtin_live_mem_info, ptr %live_variables, i64 1
;CHECK: %[[ITEM1:.+]] = getelementptr inbounds %__vecz_v32_sub_group_all_builtin_live_mem_info, ptr %[[BARRIER1]], i32 0, i32 0
;CHECK: %[[LD1:.+]] = load i1, ptr %[[ITEM1]], align 1
;CHECK: %[[ACCUM1:.+]] = and i1 %[[ACCUM0]], %[[LD1]]
;CHECK: br label %loopIR5
; CHECK-LABEL: sw.bb2:
; CHECK: br label %loopIR13

;CHECK-LABEL: loopIR5:
;CHECK: %18 = phi i1 [ %[[ACCUM1]], %sw.bb2 ], [ %{{.+}}, %loopIR5 ]
; CHECK-LABEL: loopIR13:
; CHECK: %[[PHI:.+]] = phi i64 [ 0, %sw.bb2 ], [ %[[INC:.+]], %loopIR13 ]
; CHECK: %[[PHI_ACCUM:.+]] = phi i1 [ true, %sw.bb2 ], [ %[[ACCUM:.+]], %loopIR13 ]
; CHECK: %[[BARRIER:.+]] = getelementptr inbounds %__vecz_v32_sub_group_all_builtin_live_mem_info, ptr %live_variables, i64 %[[PHI]]
; CHECK: %[[ITEM:.+]] = getelementptr inbounds %__vecz_v32_sub_group_all_builtin_live_mem_info, ptr %[[BARRIER]], i32 0, i32 0
; CHECK: %[[LD:.+]] = load i1, ptr %[[ITEM]], align 1
; CHECK: %[[ACCUM]] = and i1 %[[PHI_ACCUM]], %[[LD]]
; CHECK: %[[CMP:.+]] = icmp ult i64 %[[INC]], 2
; CHECK: br i1 %[[CMP]], label %loopIR13, label %exitIR14

; CHECK-LABEL: exitIR14:
; CHECK: %WGC_reduce = phi i1 [ %[[ACCUM]], %loopIR13 ]
10 changes: 3 additions & 7 deletions modules/compiler/utils/include/compiler/utils/pass_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,6 @@ struct CreateLoopOpts {
/// then it is created as the constant 1, based on type of `indexStart`,
/// which is a parameter to compiler::utils::createLoop proper.
llvm::Value *indexInc = nullptr;
/// @brief attemptUnroll Attempt to unroll the loop if possible.
bool attemptUnroll = false;
/// @brief disableVectorize Sets loop metadata disabling further
/// vectorization.
bool disableVectorize = false;
Expand All @@ -217,11 +215,9 @@ struct CreateLoopOpts {
/// user-specified amount. The loop thus has a trip count equal to the
/// following C-style loop: `for (auto i = start; i < end; i += incr)`.
///
/// Note that this helper does not always create a CFG loop. Users should be
/// careful not to rely on CFG structure, for example, creating PHIs in the
/// body function when passing attemptUnroll. In this instance, when unrolling,
/// the body function may be called *multiple times* in a straight line. Simple
/// induction variables can instead be created with the IV parameters.
/// Note that this helper always creates a CFG loop, even if the loop bounds
/// are known not to produce a loop at compile time. Users can use stock LLVM
/// optimizations to eliminate/simplify the loop in such a case.
///
/// @param entry Loop pre-header block. This block will be rewired to jump into
/// the new loop.
Expand Down
57 changes: 0 additions & 57 deletions modules/compiler/utils/source/pass_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,63 +502,6 @@ llvm::BasicBlock *createLoop(llvm::BasicBlock *entry, llvm::BasicBlock *exit,
llvm::SmallVector<llvm::Value *, 4> currIVs(opts.IVs.begin(), opts.IVs.end());
llvm::SmallVector<llvm::Value *, 4> nextIVs(opts.IVs.size());

// Check if indexStart, indexEnd, and indexInc are constants.
if (llvm::isa<llvm::ConstantInt>(indexStart) &&
llvm::isa<llvm::ConstantInt>(indexEnd) &&
llvm::isa<llvm::ConstantInt>(indexInc)) {
auto start = llvm::cast<llvm::ConstantInt>(indexStart)->getZExtValue();
auto end = llvm::cast<llvm::ConstantInt>(indexEnd)->getZExtValue();
auto inc = llvm::cast<llvm::ConstantInt>(indexInc)->getZExtValue();

// If the loop requires no iteration at all, just return an empty block.
if ((end - start) == 0) {
// If no iteration is required, and we have already defined a block to
// branch to, create a link between the entry block to that exit block and
// return the latter.
if (exit) {
llvm::BranchInst::Create(exit, entry);
return exit;
}
return entry;
} else if ((end - start) == inc) {
// If our loop would only actually contain one iteration, don't output the
// loop body!
// run the lamdba for the loop body, storing the block is finished at.
auto *b = body(entry, indexStart, currIVs, nextIVs);
if (exit) {
llvm::BranchInst::Create(exit, b);
return exit;
}
return b;
} else if (opts.attemptUnroll) {
// We've been asked to attempt to unroll the loop! We only unroll loops in
// certain situations though. We only unroll loops that have
// maxUnrollIterations or less iterations, as a higher number will
// significantly increase code bloat.
const unsigned maxUnrollIterations = 2;
if (((end - start) / inc) <= maxUnrollIterations) {
// We start at the entry block for our insertions.
llvm::BasicBlock *last = entry;

for (auto i = start; i < end; i += inc) {
// Update last to the exit block from the body.
last = body(last, llvm::ConstantInt::get(indexStart->getType(), i),
currIVs, nextIVs);
// Pass the 'next' values on to the next iteration, reusing the
// storage.
std::swap(currIVs, nextIVs);
}

// Return the last basic block we wrote to (our exit block).
if (exit) {
llvm::BranchInst::Create(exit, last);
return exit;
}
return last;
}
}
}

// the basic block that will link into our loop
llvm::IRBuilder<> entryIR(entry);

Expand Down
11 changes: 1 addition & 10 deletions modules/compiler/utils/source/work_item_loops_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,6 @@ struct ScheduleGenerator {

compiler::utils::CreateLoopOpts inner_opts;
inner_opts.IVs = {accumulator};
inner_opts.attemptUnroll = true;
inner_opts.disableVectorize = true;

BasicBlock *preheader = block;
Expand Down Expand Up @@ -443,11 +442,7 @@ struct ScheduleGenerator {
});

if (!resultPhi) {
if (exitBlock == latchBlock) {
// If `createLoop()` unrolled the loop, there will not be a separate
// exit block, so we won't create the LCSSA PHI node.
return {exitBlock, accumulator};
}
assert(exitBlock != latchBlock && "createLoop didn't create a loop");
resultPhi = PHINode::Create(accTy, 1, "WGC_reduce", exitBlock);
}
resultPhi->addIncoming(accumulator, latchBlock);
Expand Down Expand Up @@ -742,7 +737,6 @@ struct ScheduleGenerator {
compiler::utils::CreateLoopOpts inner_opts;
inner_opts.indexInc = VF;
inner_opts.IVs = ivs1.vec();
inner_opts.attemptUnroll = true;

BasicBlock *exit0 = compiler::utils::createLoop(
block, nullptr, zero, mainLoopLimit, inner_opts,
Expand Down Expand Up @@ -853,7 +847,6 @@ struct ScheduleGenerator {

compiler::utils::CreateLoopOpts inner_opts;
inner_opts.IVs = ivs1.vec();
inner_opts.attemptUnroll = true;
inner_opts.disableVectorize = true;

BasicBlock *exit0 = compiler::utils::createLoop(
Expand Down Expand Up @@ -1030,7 +1023,6 @@ struct ScheduleGenerator {

compiler::utils::CreateLoopOpts inner_vf_opts;
inner_vf_opts.indexInc = VF;
inner_vf_opts.attemptUnroll = true;
inner_vf_opts.IVs = ivs1.vec();
inner_vf_opts.loopIVNames.push_back("sg.x.main");
if (isScan) {
Expand Down Expand Up @@ -1182,7 +1174,6 @@ struct ScheduleGenerator {
tailLoopBB = tailPreheaderBB;
} else {
compiler::utils::CreateLoopOpts inner_scalar_opts;
inner_scalar_opts.attemptUnroll = true;
inner_scalar_opts.disableVectorize = true;
inner_scalar_opts.IVs.assign(subgroupIVs0.begin(),
subgroupIVs0.end());
Expand Down
9 changes: 7 additions & 2 deletions modules/compiler/vecz/test/lit/llvm/masked_atomics_scalar.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ declare i32 @__vecz_b_v1_masked_atomicrmw_add_align4_acquire_1_u3ptrjb(ptr %p, i

; CHECK: define i32 @__vecz_b_v1_masked_atomicrmw_add_align4_acquire_1_u3ptrjb(ptr %p, i32 %val, i1 %mask) {
; CHECK: entry:
; CHECK: br label %loopIR

; CHECK: loopIR:
; CHECK: [[RET_PREV:%.*]] = phi i32 [ poison, %entry ], [ [[RET:%.*]], %if.else ]
; CHECK: [[MASKCMP:%.*]] = icmp ne i1 %mask, false
; CHECK: br i1 [[MASKCMP]], label %if.then, label %if.else

Expand All @@ -36,8 +40,9 @@ declare i32 @__vecz_b_v1_masked_atomicrmw_add_align4_acquire_1_u3ptrjb(ptr %p, i
; CHECK: br label %if.else

; CHECK: if.else:
; CHECK: [[RET:%.*]] = phi i32 [ poison, %entry ], [ [[ATOM]], %if.then ]
; CHECK: br label %exit
; CHECK: [[RET]] = phi i32 [ [[RET_PREV]], %loopIR ], [ [[ATOM]], %if.then ]
; CHECK: [[CMP:%.*]] = icmp ult i32 %{{.*}}, 1
; CHECK: br i1 [[CMP]], label %loopIR, label %exit

; CHECK: exit:
; CHECK: ret i32 [[RET]]
12 changes: 9 additions & 3 deletions modules/compiler/vecz/test/lit/llvm/masked_cmpxchg_scalar.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ declare { i32, i1 } @__vecz_b_v1_masked_cmpxchg_align4_acquire_monotonic_1_u3ptr

; CHECK: define { i32, i1 } @__vecz_b_v1_masked_cmpxchg_align4_acquire_monotonic_1_u3ptrjjb(ptr %p, i32 %cmp, i32 %newval, i1 %mask) {
; CHECK: entry:
; CHECK: br label %loopIR

; CHECK: loopIR:
; CHECK: [[RETVAL_PREV:%.*]] = phi i32 [ poison, %entry ], [ [[RETVAL:%.*]], %if.else ]
; CHECK: [[RETSUCC_PREV:%.*]] = phi i1 [ poison, %entry ], [ [[RETSUCC:%.*]], %if.else ]
; CHECK: [[MASKCMP:%.*]] = icmp ne i1 %mask, false
; CHECK: br i1 [[MASKCMP]], label %if.then, label %if.else

Expand All @@ -38,9 +43,10 @@ declare { i32, i1 } @__vecz_b_v1_masked_cmpxchg_align4_acquire_monotonic_1_u3ptr
; CHECK: br label %if.else

; CHECK: if.else:
; CHECK: [[RETVAL:%.*]] = phi i32 [ poison, %entry ], [ [[EXT0]], %if.then ]
; CHECK: [[RETSUCC:%.*]] = phi i1 [ poison, %entry ], [ [[EXT1]], %if.then ]
; CHECK: br label %exit
; CHECK: [[RETVAL]] = phi i32 [ [[RETVAL_PREV]], %loopIR ], [ [[EXT0]], %if.then ]
; CHECK: [[RETSUCC]] = phi i1 [ [[RETSUCC_PREV]], %loopIR ], [ [[EXT1]], %if.then ]
; CHECK: [[CMP:%.*]] = icmp ult i32 %{{.*}}, 1
; CHECK: br i1 [[CMP]], label %loopIR, label %exit

; CHECK: exit:
; CHECK: [[INS0:%.*]] = insertvalue { i32, i1 } poison, i32 [[RETVAL]], 0
Expand Down

0 comments on commit 965e267

Please sign in to comment.