From a2e1ac39367250ac598a5267df6ddc401e8a1a0a Mon Sep 17 00:00:00 2001 From: Amy Date: Mon, 3 Jul 2023 13:23:27 +0100 Subject: [PATCH 1/5] Retrieve values from barrier struct for use in new memory barriers where necessary --- .../include/compiler/utils/barrier_regions.h | 21 ++ .../compiler/utils/source/barrier_regions.cpp | 245 +++++++++--------- .../utils/source/handle_barriers_pass.cpp | 31 ++- 3 files changed, 169 insertions(+), 128 deletions(-) diff --git a/modules/compiler/utils/include/compiler/utils/barrier_regions.h b/modules/compiler/utils/include/compiler/utils/barrier_regions.h index 9ab38f846..0cd6d8d07 100644 --- a/modules/compiler/utils/include/compiler/utils/barrier_regions.h +++ b/modules/compiler/utils/include/compiler/utils/barrier_regions.h @@ -157,6 +157,27 @@ class Barrier { llvm::Function &getFunc() { return func_; } const llvm::Function &getFunc() const { return func_; } + /// @brief struct to help retrieval of values from the barrier struct + struct LiveValuesHelper { + Barrier &barrier; + llvm::DenseMap live_GEPs; + llvm::DenseMap reloads; + llvm::Instruction *insert_point = nullptr; + llvm::Value *barrier_struct = nullptr; + llvm::Value *vscale = nullptr; + + LiveValuesHelper(Barrier &b, llvm::Instruction *i, llvm::Value *s) + : barrier(b), insert_point(i), barrier_struct(s) {} + + /// @brief get a GEP instruction pointing to the given value in the barrier + /// struct. + llvm::Value *getGEP(const llvm::Value *live); + + /// @brief get a value reloaded from the barrier struct. + llvm::Value *getReload(llvm::Value *live, llvm::Instruction *insert, + const char *name, bool reuse = false); + }; + private: /// @brief The first is set for livein and the second is set for liveout using live_in_out_t = diff --git a/modules/compiler/utils/source/barrier_regions.cpp b/modules/compiler/utils/source/barrier_regions.cpp index ad3ed11cf..0494e15ba 100644 --- a/modules/compiler/utils/source/barrier_regions.cpp +++ b/modules/compiler/utils/source/barrier_regions.cpp @@ -264,6 +264,120 @@ void UpdateAndTrimPHINodeEdges(BasicBlock *BB, ValueToValueMapTy &vmap) { } // namespace +Value *compiler::utils::Barrier::LiveValuesHelper::getGEP(const Value *live) { + auto gep_it = live_GEPs.find(live); + if (gep_it != live_GEPs.end()) { + return gep_it->second; + } + + Value *gep; + Type *data_ty = live->getType(); + if (auto *AI = dyn_cast(live)) { + data_ty = AI->getAllocatedType(); + } + + if (!isa(data_ty)) { + auto field_it = barrier.live_variable_index_map_.find(live); + if (field_it == barrier.live_variable_index_map_.end()) { + return nullptr; + } + + LLVMContext &context = barrier.module_.getContext(); + unsigned field_index = field_it->second; + Value *live_variable_info_idxs[2] = { + ConstantInt::get(Type::getInt32Ty(context), 0), + ConstantInt::get(Type::getInt32Ty(context), field_index)}; + + gep = GetElementPtrInst::Create( + barrier.live_var_mem_ty_, barrier_struct, live_variable_info_idxs, + Twine("live_gep_") + live->getName(), insert_point); + } else { + auto field_it = barrier.live_variable_scalables_map_.find(live); + if (field_it == barrier.live_variable_scalables_map_.end()) { + return nullptr; + } + unsigned const field_offset = field_it->second; + Value *scaled_offset = nullptr; + + LLVMContext &context = barrier.module_.getContext(); + IRBuilder<> B(insert_point); + if (field_offset != 0) { + if (!vscale) { + Type *size_type = B.getIntNTy(barrier.size_t_bytes * 8); + vscale = B.CreateIntrinsic(Intrinsic::vscale, size_type, {}); + } + scaled_offset = B.CreateMul( + vscale, B.getIntN(barrier.size_t_bytes * 8, field_offset)); + } else { + scaled_offset = ConstantInt::get(Type::getInt32Ty(context), 0); + } + + Value *live_variable_info_idxs[3] = { + ConstantInt::get(Type::getInt32Ty(context), 0), + ConstantInt::get(Type::getInt32Ty(context), + barrier.live_var_mem_scalables_index), + scaled_offset, + }; + + // Gep into the raw byte buffer + gep = B.CreateInBoundsGEP(barrier.live_var_mem_ty_, barrier_struct, + live_variable_info_idxs, + Twine("live_gep_scalable_") + live->getName()); + + // Cast the pointer to the scalable vector type + gep = B.CreatePointerCast( + gep, + PointerType::get( + data_ty, + cast(barrier_struct->getType())->getAddressSpace())); + } + + live_GEPs.insert(std::make_pair(live, gep)); + return gep; +} + +Value *compiler::utils::Barrier::LiveValuesHelper::getReload( + Value *live, Instruction *insert, const char *name, bool reuse) { + auto &mapped = reloads[live]; + if (reuse && mapped) { + return mapped; + } + + Value *v = getGEP(live); + if (v) { + if (!isa(live)) { + // If live variable is not allocainst, insert load. + v = new LoadInst(live->getType(), v, Twine(live->getName(), name), + insert); + } + mapped = v; + return v; + } else if (auto *I = dyn_cast(live)) { + if (!reuse || !mapped) { + auto *clone = I->clone(); + clone->setName(I->getName()); + clone->insertBefore(insert); + if (insert_point == insert) { + insert_point = clone; + } + insert = clone; + mapped = clone; + I = clone; + } else { + return mapped; + } + + for (auto op_it = I->op_begin(); op_it != I->op_end();) { + auto &op = *op_it++; + if (auto *op_inst = dyn_cast(op.get())) { + op.set(getReload(op_inst, insert, name, reuse)); + } + } + return I; + } + return live; +} + void compiler::utils::Barrier::Run(llvm::ModuleAnalysisManager &mam) { bi_ = &mam.getResult(module_); FindBarriers(); @@ -962,133 +1076,10 @@ Function *compiler::utils::Barrier::GenerateNewKernel(BarrierRegion ®ion) { } // It puts all the GEPs at the start of the kernel, but only once - struct live_values_helper { - Barrier &barrier; - DenseMap live_GEPs; - DenseMap reloads; - Instruction *insert_point = nullptr; - Value *barrier_struct = nullptr; - Value *vscale = nullptr; - - live_values_helper(Barrier &b, Instruction *i, Value *s) - : barrier(b), insert_point(i), barrier_struct(s) {} - - Value *getGEP(const Value *live) { - auto gep_it = live_GEPs.find(live); - if (gep_it != live_GEPs.end()) { - return gep_it->second; - } - - Value *gep; - Type *data_ty = live->getType(); - if (auto *AI = dyn_cast(live)) { - data_ty = AI->getAllocatedType(); - } - - if (!isa(data_ty)) { - auto field_it = barrier.live_variable_index_map_.find(live); - if (field_it == barrier.live_variable_index_map_.end()) { - return nullptr; - } - - LLVMContext &context = barrier.module_.getContext(); - unsigned field_index = field_it->second; - Value *live_variable_info_idxs[2] = { - ConstantInt::get(Type::getInt32Ty(context), 0), - ConstantInt::get(Type::getInt32Ty(context), field_index)}; - - gep = GetElementPtrInst::Create( - barrier.live_var_mem_ty_, barrier_struct, live_variable_info_idxs, - Twine("live_gep_") + live->getName(), insert_point); - } else { - auto field_it = barrier.live_variable_scalables_map_.find(live); - if (field_it == barrier.live_variable_scalables_map_.end()) { - return nullptr; - } - unsigned const field_offset = field_it->second; - Value *scaled_offset = nullptr; - - LLVMContext &context = barrier.module_.getContext(); - IRBuilder<> B(insert_point); - if (field_offset != 0) { - if (!vscale) { - Type *size_type = B.getIntNTy(barrier.size_t_bytes * 8); - vscale = B.CreateIntrinsic(Intrinsic::vscale, size_type, {}); - } - scaled_offset = B.CreateMul( - vscale, B.getIntN(barrier.size_t_bytes * 8, field_offset)); - } else { - scaled_offset = ConstantInt::get(Type::getInt32Ty(context), 0); - } - - Value *live_variable_info_idxs[3] = { - ConstantInt::get(Type::getInt32Ty(context), 0), - ConstantInt::get(Type::getInt32Ty(context), - barrier.live_var_mem_scalables_index), - scaled_offset, - }; - - // Gep into the raw byte buffer - gep = B.CreateInBoundsGEP( - barrier.live_var_mem_ty_, barrier_struct, live_variable_info_idxs, - Twine("live_gep_scalable_") + live->getName()); - - // Cast the pointer to the scalable vector type - gep = B.CreatePointerCast( - gep, PointerType::get(data_ty, - cast(barrier_struct->getType()) - ->getAddressSpace())); - } - - live_GEPs.insert(std::make_pair(live, gep)); - return gep; - } - - Value *getReload(Value *live, Instruction *insert, const char *name, - bool reuse = false) { - auto &mapped = reloads[live]; - if (reuse && mapped) { - return mapped; - } - - Value *v = getGEP(live); - if (v) { - if (!isa(live)) { - // If live variable is not allocainst, insert load. - v = new LoadInst(live->getType(), v, Twine(live->getName(), name), - insert); - } - mapped = v; - return v; - } else if (auto *I = dyn_cast(live)) { - if (!reuse || !mapped) { - auto *clone = I->clone(); - clone->setName(I->getName()); - clone->insertBefore(insert); - if (insert_point == insert) { - insert_point = clone; - } - insert = clone; - mapped = clone; - I = clone; - } else { - return mapped; - } - - for (auto op_it = I->op_begin(); op_it != I->op_end();) { - auto &op = *op_it++; - if (auto *op_inst = dyn_cast(op.get())) { - op.set(getReload(op_inst, insert, name, reuse)); - } - } - return I; - } - return live; - } - - } live_values(*this, insert_point, - hasBarrierStruct ? compiler::utils::getLastArgument(new_kernel) - : nullptr); + LiveValuesHelper live_values( + *this, insert_point, + hasBarrierStruct ? compiler::utils::getLastArgument(new_kernel) + : nullptr); // Load live variables and map them. // These variables are defined in a different kernel, so we insert the diff --git a/modules/compiler/utils/source/handle_barriers_pass.cpp b/modules/compiler/utils/source/handle_barriers_pass.cpp index 0a840f2bc..7d9c8d633 100644 --- a/modules/compiler/utils/source/handle_barriers_pass.cpp +++ b/modules/compiler/utils/source/handle_barriers_pass.cpp @@ -1143,7 +1143,36 @@ Function *compiler::utils::HandleBarriersPass::makeWrapperFunction( auto *MemBarrier = BI.getOrDeclareMuxBuiltin(eMuxBuiltinMemBarrier, M); assert(MemBarrier); Value *Ops[2] = {CI->getOperand(1), CI->getOperand(2)}; - auto *Call = B.CreateCall(MemBarrier, Ops); + + auto *const Call = B.CreateCall(MemBarrier, Ops); + + // Patch up any operands that were non-constants by fetching them from + // the barrier struct. We do this after creating the call because we + // need an instruction to function as an insert point. + if (!isa(Ops[0]) || !isa(Ops[1])) { + dbgs() << *Call << "\n"; + // We expect these values to be uniform so it should be safe to get + // from the barrier struct at index zero. Barriers are convergent, so + // there should be no chance that the value does not exist. + auto *const zero = + Constant::getNullValue(Type::getIntNTy(context, 8 * sizeTyBytes)); + IRBuilder<> ir(Call); + auto *const barrier0 = ir.CreateInBoundsGEP( + barrierMain.getLiveVarsType(), barrierMain.getMemSpace(), {zero}); + + Barrier::LiveValuesHelper live_values(barrierMain, Call, barrier0); + + size_t op_index = 0; + for (auto *const op : Ops) { + if (!isa(op)) { + auto *const new_op = + live_values.getReload(op, Call, "_load", true); + Call->setArgOperand(op_index, new_op); + } + ++op_index; + } + } + Call->setDebugLoc(wrapperDbgLoc); } From ee165bb4430a81dfbfdd2c75d351de1c1b8871c9 Mon Sep 17 00:00:00 2001 From: Amy Date: Mon, 3 Jul 2023 15:42:49 +0100 Subject: [PATCH 2/5] Code review --- .../utils/include/compiler/utils/barrier_regions.h | 6 ++++++ modules/compiler/utils/source/barrier_regions.cpp | 7 ++++--- modules/compiler/utils/source/handle_barriers_pass.cpp | 3 +-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/modules/compiler/utils/include/compiler/utils/barrier_regions.h b/modules/compiler/utils/include/compiler/utils/barrier_regions.h index 0cd6d8d07..c0c935a0d 100644 --- a/modules/compiler/utils/include/compiler/utils/barrier_regions.h +++ b/modules/compiler/utils/include/compiler/utils/barrier_regions.h @@ -174,6 +174,12 @@ class Barrier { llvm::Value *getGEP(const llvm::Value *live); /// @brief get a value reloaded from the barrier struct. + /// + /// @param[in] live the live value to retrieve from the barrier + /// @param[in] insert where to insert new instructions + /// @param[in] name a postfix to append to new value names + /// @param[in] reuse whether to generate the load for a given value only + /// once, returning the previously cached value on further requests. llvm::Value *getReload(llvm::Value *live, llvm::Instruction *insert, const char *name, bool reuse = false); }; diff --git a/modules/compiler/utils/source/barrier_regions.cpp b/modules/compiler/utils/source/barrier_regions.cpp index 0494e15ba..da36afb50 100644 --- a/modules/compiler/utils/source/barrier_regions.cpp +++ b/modules/compiler/utils/source/barrier_regions.cpp @@ -343,8 +343,7 @@ Value *compiler::utils::Barrier::LiveValuesHelper::getReload( return mapped; } - Value *v = getGEP(live); - if (v) { + if (Value *v = getGEP(live)) { if (!isa(live)) { // If live variable is not allocainst, insert load. v = new LoadInst(live->getType(), v, Twine(live->getName(), name), @@ -352,7 +351,9 @@ Value *compiler::utils::Barrier::LiveValuesHelper::getReload( } mapped = v; return v; - } else if (auto *I = dyn_cast(live)) { + } + + if (auto *I = dyn_cast(live)) { if (!reuse || !mapped) { auto *clone = I->clone(); clone->setName(I->getName()); diff --git a/modules/compiler/utils/source/handle_barriers_pass.cpp b/modules/compiler/utils/source/handle_barriers_pass.cpp index 7d9c8d633..007e79fcd 100644 --- a/modules/compiler/utils/source/handle_barriers_pass.cpp +++ b/modules/compiler/utils/source/handle_barriers_pass.cpp @@ -1150,7 +1150,6 @@ Function *compiler::utils::HandleBarriersPass::makeWrapperFunction( // the barrier struct. We do this after creating the call because we // need an instruction to function as an insert point. if (!isa(Ops[0]) || !isa(Ops[1])) { - dbgs() << *Call << "\n"; // We expect these values to be uniform so it should be safe to get // from the barrier struct at index zero. Barriers are convergent, so // there should be no chance that the value does not exist. @@ -1166,7 +1165,7 @@ Function *compiler::utils::HandleBarriersPass::makeWrapperFunction( for (auto *const op : Ops) { if (!isa(op)) { auto *const new_op = - live_values.getReload(op, Call, "_load", true); + live_values.getReload(op, Call, "_load", /*reuse*/ true); Call->setArgOperand(op_index, new_op); } ++op_index; From 45176078602684543adc75d071624ae4a431a926 Mon Sep 17 00:00:00 2001 From: Amy Date: Wed, 5 Jul 2023 14:15:11 +0100 Subject: [PATCH 3/5] Fix segfault revealed during testing --- .../compiler/utils/source/barrier_regions.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/modules/compiler/utils/source/barrier_regions.cpp b/modules/compiler/utils/source/barrier_regions.cpp index da36afb50..64c754f07 100644 --- a/modules/compiler/utils/source/barrier_regions.cpp +++ b/modules/compiler/utils/source/barrier_regions.cpp @@ -432,14 +432,16 @@ void compiler::utils::Barrier::FindBarriers() { // Check call instructions for barrier. if (CallInst *call_inst = dyn_cast(&bi)) { Function *callee = call_inst->getCalledFunction(); - auto const B = bi_->analyzeBuiltin(*callee); - if (callee != nullptr && isMuxWGControlBarrierID(B.ID)) { - unsigned id = ~0u; - auto *const id_param = call_inst->getOperand(0); - if (auto *const id_param_c = dyn_cast(id_param)) { - id = id_param_c->getZExtValue(); + if (callee != nullptr) { + auto const B = bi_->analyzeBuiltin(*callee); + if (isMuxWGControlBarrierID(B.ID)) { + unsigned id = ~0u; + auto *const id_param = call_inst->getOperand(0); + if (auto *const id_param_c = dyn_cast(id_param)) { + id = id_param_c->getZExtValue(); + } + orderedBarriers.emplace_back(id, call_inst); } - orderedBarriers.emplace_back(id, call_inst); } } } @@ -1062,6 +1064,7 @@ Function *compiler::utils::Barrier::GenerateNewKernel(BarrierRegion ®ion) { if (CallInst *call_inst = dyn_cast(insert_point)) { Function *callee = call_inst->getCalledFunction(); + dbgs() << *call_inst << "\n"; assert(callee && "Could not get called function"); auto const B = bi_->analyzeBuiltin(*callee); if (isMuxWGControlBarrierID(B.ID)) { From bc6d795773b2f4801b5db67f42a047864cbe83c7 Mon Sep 17 00:00:00 2001 From: Amy Date: Wed, 5 Jul 2023 14:15:39 +0100 Subject: [PATCH 4/5] Add Lit test to check-passes-lit target. --- .../passes/barriers-controlled-mem-fence.ll | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 modules/compiler/test/lit/passes/barriers-controlled-mem-fence.ll diff --git a/modules/compiler/test/lit/passes/barriers-controlled-mem-fence.ll b/modules/compiler/test/lit/passes/barriers-controlled-mem-fence.ll new file mode 100644 index 000000000..a2fd877ca --- /dev/null +++ b/modules/compiler/test/lit/passes/barriers-controlled-mem-fence.ll @@ -0,0 +1,48 @@ +; Copyright (C) Codeplay Software Limited +; +; Licensed under the Apache License, Version 2.0 (the "License") with LLVM +; Exceptions; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; https://github.com/codeplaysoftware/oneapi-construction-kit/blob/main/LICENSE.txt +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +; WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +; License for the specific language governing permissions and limitations +; under the License. +; +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +; RUN: muxc --passes barriers-pass,verify -S %s | FileCheck %s + +; It checks to make sure that the variable used by the barrier is reloaded from +; the barrier struct for use in the newly-created Memory Barrier call in the +; wrapper function. + +; CHECK-LABEL: sw.bb2: +; CHECK-NEXT: %[[GEP:.+]] = getelementptr inbounds %test_fence_live_mem_info, ptr %live_variables, i64 0 +; CHECK-NEXT: %live_gep_secret = getelementptr %test_fence_live_mem_info, ptr %[[GEP]], i32 0, i32 0 +; CHECK-NEXT: %secret_load = load i32, ptr %live_gep_secret, align 4 +; CHECK-NEXT: call void @__mux_mem_barrier(i32 %secret_load, i32 912) + +; ModuleID = 'SPIR-V' +source_filename = "SPIR-V" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown-elf" + +; Function Attrs: convergent nounwind +define internal void @test_fence(ptr addrspace(3) nocapture %out) local_unnamed_addr #0 { +entry: + call void @mysterious_side_effect(ptr addrspace(3) %out) + %secret = call i32 @mysterious_secret_generator(i32 0) + tail call void @__mux_work_group_barrier(i32 0, i32 %secret, i32 912) + call void @mysterious_side_effect(ptr addrspace(3) %out) + ret void +} + +declare i32 @mysterious_secret_generator(i32) +declare void @mysterious_side_effect(ptr addrspace(3)) +declare void @__mux_work_group_barrier(i32, i32, i32) + +attributes #0 = { "mux-kernel"="entry-point" } From bef5587652d5ed070280bd5447865156d280d602 Mon Sep 17 00:00:00 2001 From: Amy Date: Wed, 5 Jul 2023 14:56:49 +0100 Subject: [PATCH 5/5] Oops, remove debug print --- modules/compiler/utils/source/barrier_regions.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/compiler/utils/source/barrier_regions.cpp b/modules/compiler/utils/source/barrier_regions.cpp index 64c754f07..d2d4af978 100644 --- a/modules/compiler/utils/source/barrier_regions.cpp +++ b/modules/compiler/utils/source/barrier_regions.cpp @@ -1064,7 +1064,6 @@ Function *compiler::utils::Barrier::GenerateNewKernel(BarrierRegion ®ion) { if (CallInst *call_inst = dyn_cast(insert_point)) { Function *callee = call_inst->getCalledFunction(); - dbgs() << *call_inst << "\n"; assert(callee && "Could not get called function"); auto const B = bi_->analyzeBuiltin(*callee); if (isMuxWGControlBarrierID(B.ID)) {