Skip to content

Commit

Permalink
Merge pull request #53 from AmyCodeplay/amy/barrier-wrapper-value-usage
Browse files Browse the repository at this point in the history
Retrieve values from barrier struct for use in new memory barriers
  • Loading branch information
AmyCodeplay authored Jul 5, 2023
2 parents 4a7289e + bef5587 commit 44b9ea9
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 135 deletions.
48 changes: 48 additions & 0 deletions modules/compiler/test/lit/passes/barriers-controlled-mem-fence.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
; Copyright (C) Codeplay Software Limited
;
; Licensed under the Apache License, Version 2.0 (the "License") with LLVM
; Exceptions; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
; https://github.com/codeplaysoftware/oneapi-construction-kit/blob/main/LICENSE.txt
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
; WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
; License for the specific language governing permissions and limitations
; under the License.
;
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

; RUN: muxc --passes barriers-pass,verify -S %s | FileCheck %s

; It checks to make sure that the variable used by the barrier is reloaded from
; the barrier struct for use in the newly-created Memory Barrier call in the
; wrapper function.

; CHECK-LABEL: sw.bb2:
; CHECK-NEXT: %[[GEP:.+]] = getelementptr inbounds %test_fence_live_mem_info, ptr %live_variables, i64 0
; CHECK-NEXT: %live_gep_secret = getelementptr %test_fence_live_mem_info, ptr %[[GEP]], i32 0, i32 0
; CHECK-NEXT: %secret_load = load i32, ptr %live_gep_secret, align 4
; CHECK-NEXT: call void @__mux_mem_barrier(i32 %secret_load, i32 912)

; ModuleID = 'SPIR-V'
source_filename = "SPIR-V"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown-elf"

; Function Attrs: convergent nounwind
define internal void @test_fence(ptr addrspace(3) nocapture %out) local_unnamed_addr #0 {
entry:
call void @mysterious_side_effect(ptr addrspace(3) %out)
%secret = call i32 @mysterious_secret_generator(i32 0)
tail call void @__mux_work_group_barrier(i32 0, i32 %secret, i32 912)
call void @mysterious_side_effect(ptr addrspace(3) %out)
ret void
}

declare i32 @mysterious_secret_generator(i32)
declare void @mysterious_side_effect(ptr addrspace(3))
declare void @__mux_work_group_barrier(i32, i32, i32)

attributes #0 = { "mux-kernel"="entry-point" }
27 changes: 27 additions & 0 deletions modules/compiler/utils/include/compiler/utils/barrier_regions.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,33 @@ class Barrier {
llvm::Function &getFunc() { return func_; }
const llvm::Function &getFunc() const { return func_; }

/// @brief struct to help retrieval of values from the barrier struct
struct LiveValuesHelper {
Barrier &barrier;
llvm::DenseMap<const llvm::Value *, llvm::Value *> live_GEPs;
llvm::DenseMap<const llvm::Value *, llvm::Value *> reloads;
llvm::Instruction *insert_point = nullptr;
llvm::Value *barrier_struct = nullptr;
llvm::Value *vscale = nullptr;

LiveValuesHelper(Barrier &b, llvm::Instruction *i, llvm::Value *s)
: barrier(b), insert_point(i), barrier_struct(s) {}

/// @brief get a GEP instruction pointing to the given value in the barrier
/// struct.
llvm::Value *getGEP(const llvm::Value *live);

/// @brief get a value reloaded from the barrier struct.
///
/// @param[in] live the live value to retrieve from the barrier
/// @param[in] insert where to insert new instructions
/// @param[in] name a postfix to append to new value names
/// @param[in] reuse whether to generate the load for a given value only
/// once, returning the previously cached value on further requests.
llvm::Value *getReload(llvm::Value *live, llvm::Instruction *insert,
const char *name, bool reuse = false);
};

private:
/// @brief The first is set for livein and the second is set for liveout
using live_in_out_t =
Expand Down
262 changes: 128 additions & 134 deletions modules/compiler/utils/source/barrier_regions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,121 @@ void UpdateAndTrimPHINodeEdges(BasicBlock *BB, ValueToValueMapTy &vmap) {

} // namespace

Value *compiler::utils::Barrier::LiveValuesHelper::getGEP(const Value *live) {
auto gep_it = live_GEPs.find(live);
if (gep_it != live_GEPs.end()) {
return gep_it->second;
}

Value *gep;
Type *data_ty = live->getType();
if (auto *AI = dyn_cast<AllocaInst>(live)) {
data_ty = AI->getAllocatedType();
}

if (!isa<ScalableVectorType>(data_ty)) {
auto field_it = barrier.live_variable_index_map_.find(live);
if (field_it == barrier.live_variable_index_map_.end()) {
return nullptr;
}

LLVMContext &context = barrier.module_.getContext();
unsigned field_index = field_it->second;
Value *live_variable_info_idxs[2] = {
ConstantInt::get(Type::getInt32Ty(context), 0),
ConstantInt::get(Type::getInt32Ty(context), field_index)};

gep = GetElementPtrInst::Create(
barrier.live_var_mem_ty_, barrier_struct, live_variable_info_idxs,
Twine("live_gep_") + live->getName(), insert_point);
} else {
auto field_it = barrier.live_variable_scalables_map_.find(live);
if (field_it == barrier.live_variable_scalables_map_.end()) {
return nullptr;
}
unsigned const field_offset = field_it->second;
Value *scaled_offset = nullptr;

LLVMContext &context = barrier.module_.getContext();
IRBuilder<> B(insert_point);
if (field_offset != 0) {
if (!vscale) {
Type *size_type = B.getIntNTy(barrier.size_t_bytes * 8);
vscale = B.CreateIntrinsic(Intrinsic::vscale, size_type, {});
}
scaled_offset = B.CreateMul(
vscale, B.getIntN(barrier.size_t_bytes * 8, field_offset));
} else {
scaled_offset = ConstantInt::get(Type::getInt32Ty(context), 0);
}

Value *live_variable_info_idxs[3] = {
ConstantInt::get(Type::getInt32Ty(context), 0),
ConstantInt::get(Type::getInt32Ty(context),
barrier.live_var_mem_scalables_index),
scaled_offset,
};

// Gep into the raw byte buffer
gep = B.CreateInBoundsGEP(barrier.live_var_mem_ty_, barrier_struct,
live_variable_info_idxs,
Twine("live_gep_scalable_") + live->getName());

// Cast the pointer to the scalable vector type
gep = B.CreatePointerCast(
gep,
PointerType::get(
data_ty,
cast<PointerType>(barrier_struct->getType())->getAddressSpace()));
}

live_GEPs.insert(std::make_pair(live, gep));
return gep;
}

Value *compiler::utils::Barrier::LiveValuesHelper::getReload(
Value *live, Instruction *insert, const char *name, bool reuse) {
auto &mapped = reloads[live];
if (reuse && mapped) {
return mapped;
}

if (Value *v = getGEP(live)) {
if (!isa<AllocaInst>(live)) {
// If live variable is not allocainst, insert load.
v = new LoadInst(live->getType(), v, Twine(live->getName(), name),
insert);
}
mapped = v;
return v;
}

if (auto *I = dyn_cast<Instruction>(live)) {
if (!reuse || !mapped) {
auto *clone = I->clone();
clone->setName(I->getName());
clone->insertBefore(insert);
if (insert_point == insert) {
insert_point = clone;
}
insert = clone;
mapped = clone;
I = clone;
} else {
return mapped;
}

for (auto op_it = I->op_begin(); op_it != I->op_end();) {
auto &op = *op_it++;
if (auto *op_inst = dyn_cast<Instruction>(op.get())) {
op.set(getReload(op_inst, insert, name, reuse));
}
}
return I;
}
return live;
}

void compiler::utils::Barrier::Run(llvm::ModuleAnalysisManager &mam) {
bi_ = &mam.getResult<BuiltinInfoAnalysis>(module_);
FindBarriers();
Expand Down Expand Up @@ -317,14 +432,16 @@ void compiler::utils::Barrier::FindBarriers() {
// Check call instructions for barrier.
if (CallInst *call_inst = dyn_cast<CallInst>(&bi)) {
Function *callee = call_inst->getCalledFunction();
auto const B = bi_->analyzeBuiltin(*callee);
if (callee != nullptr && isMuxWGControlBarrierID(B.ID)) {
unsigned id = ~0u;
auto *const id_param = call_inst->getOperand(0);
if (auto *const id_param_c = dyn_cast<ConstantInt>(id_param)) {
id = id_param_c->getZExtValue();
if (callee != nullptr) {
auto const B = bi_->analyzeBuiltin(*callee);
if (isMuxWGControlBarrierID(B.ID)) {
unsigned id = ~0u;
auto *const id_param = call_inst->getOperand(0);
if (auto *const id_param_c = dyn_cast<ConstantInt>(id_param)) {
id = id_param_c->getZExtValue();
}
orderedBarriers.emplace_back(id, call_inst);
}
orderedBarriers.emplace_back(id, call_inst);
}
}
}
Expand Down Expand Up @@ -962,133 +1079,10 @@ Function *compiler::utils::Barrier::GenerateNewKernel(BarrierRegion &region) {
}

// It puts all the GEPs at the start of the kernel, but only once
struct live_values_helper {
Barrier &barrier;
DenseMap<const Value *, Value *> live_GEPs;
DenseMap<const Value *, Value *> reloads;
Instruction *insert_point = nullptr;
Value *barrier_struct = nullptr;
Value *vscale = nullptr;

live_values_helper(Barrier &b, Instruction *i, Value *s)
: barrier(b), insert_point(i), barrier_struct(s) {}

Value *getGEP(const Value *live) {
auto gep_it = live_GEPs.find(live);
if (gep_it != live_GEPs.end()) {
return gep_it->second;
}

Value *gep;
Type *data_ty = live->getType();
if (auto *AI = dyn_cast<AllocaInst>(live)) {
data_ty = AI->getAllocatedType();
}

if (!isa<ScalableVectorType>(data_ty)) {
auto field_it = barrier.live_variable_index_map_.find(live);
if (field_it == barrier.live_variable_index_map_.end()) {
return nullptr;
}

LLVMContext &context = barrier.module_.getContext();
unsigned field_index = field_it->second;
Value *live_variable_info_idxs[2] = {
ConstantInt::get(Type::getInt32Ty(context), 0),
ConstantInt::get(Type::getInt32Ty(context), field_index)};

gep = GetElementPtrInst::Create(
barrier.live_var_mem_ty_, barrier_struct, live_variable_info_idxs,
Twine("live_gep_") + live->getName(), insert_point);
} else {
auto field_it = barrier.live_variable_scalables_map_.find(live);
if (field_it == barrier.live_variable_scalables_map_.end()) {
return nullptr;
}
unsigned const field_offset = field_it->second;
Value *scaled_offset = nullptr;

LLVMContext &context = barrier.module_.getContext();
IRBuilder<> B(insert_point);
if (field_offset != 0) {
if (!vscale) {
Type *size_type = B.getIntNTy(barrier.size_t_bytes * 8);
vscale = B.CreateIntrinsic(Intrinsic::vscale, size_type, {});
}
scaled_offset = B.CreateMul(
vscale, B.getIntN(barrier.size_t_bytes * 8, field_offset));
} else {
scaled_offset = ConstantInt::get(Type::getInt32Ty(context), 0);
}

Value *live_variable_info_idxs[3] = {
ConstantInt::get(Type::getInt32Ty(context), 0),
ConstantInt::get(Type::getInt32Ty(context),
barrier.live_var_mem_scalables_index),
scaled_offset,
};

// Gep into the raw byte buffer
gep = B.CreateInBoundsGEP(
barrier.live_var_mem_ty_, barrier_struct, live_variable_info_idxs,
Twine("live_gep_scalable_") + live->getName());

// Cast the pointer to the scalable vector type
gep = B.CreatePointerCast(
gep, PointerType::get(data_ty,
cast<PointerType>(barrier_struct->getType())
->getAddressSpace()));
}

live_GEPs.insert(std::make_pair(live, gep));
return gep;
}

Value *getReload(Value *live, Instruction *insert, const char *name,
bool reuse = false) {
auto &mapped = reloads[live];
if (reuse && mapped) {
return mapped;
}

Value *v = getGEP(live);
if (v) {
if (!isa<AllocaInst>(live)) {
// If live variable is not allocainst, insert load.
v = new LoadInst(live->getType(), v, Twine(live->getName(), name),
insert);
}
mapped = v;
return v;
} else if (auto *I = dyn_cast<Instruction>(live)) {
if (!reuse || !mapped) {
auto *clone = I->clone();
clone->setName(I->getName());
clone->insertBefore(insert);
if (insert_point == insert) {
insert_point = clone;
}
insert = clone;
mapped = clone;
I = clone;
} else {
return mapped;
}

for (auto op_it = I->op_begin(); op_it != I->op_end();) {
auto &op = *op_it++;
if (auto *op_inst = dyn_cast<Instruction>(op.get())) {
op.set(getReload(op_inst, insert, name, reuse));
}
}
return I;
}
return live;
}

} live_values(*this, insert_point,
hasBarrierStruct ? compiler::utils::getLastArgument(new_kernel)
: nullptr);
LiveValuesHelper live_values(
*this, insert_point,
hasBarrierStruct ? compiler::utils::getLastArgument(new_kernel)
: nullptr);

// Load live variables and map them.
// These variables are defined in a different kernel, so we insert the
Expand Down
Loading

0 comments on commit 44b9ea9

Please sign in to comment.