-
Notifications
You must be signed in to change notification settings - Fork 15.6k
[NVPTX] Add missing type suffixes for barrier.cta.red #172945
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesFull diff: https://github.com/llvm/llvm-project/pull/172945.diff 2 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index b145e1d53f46c..0303fd15a001a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -151,19 +151,19 @@ defm BARRIER_CTA_SYNC_ALL : BARRIER_ALL<"barrier.sync", int_nvvm_barrier_cta_syn
defm BARRIER_CTA_SYNC : BARRIER_COUNT<"barrier.sync", int_nvvm_barrier_cta_sync_count, [hasPTX<60>]>;
defm BARRIER_CTA_ARRIVE : BARRIER_COUNT<"barrier.arrive", int_nvvm_barrier_cta_arrive_count, [hasPTX<60>]>;
-defm BARRIER_CTA_RED_POPC_ALIGNED_ALL : BARRIER_RED_ALL<"bar.red.popc", int_nvvm_barrier_cta_red_popc_aligned_all, I32RT>;
-defm BARRIER_CTA_RED_AND_ALIGNED_ALL : BARRIER_RED_ALL<"bar.red.and", int_nvvm_barrier_cta_red_and_aligned_all, I1RT>;
-defm BARRIER_CTA_RED_OR_ALIGNED_ALL : BARRIER_RED_ALL<"bar.red.or", int_nvvm_barrier_cta_red_or_aligned_all, I1RT>;
-defm BARRIER_CTA_RED_POPC_ALIGNED : BARRIER_RED_COUNT<"bar.red.popc", int_nvvm_barrier_cta_red_popc_aligned_count, I32RT>;
-defm BARRIER_CTA_RED_AND_ALIGNED : BARRIER_RED_COUNT<"bar.red.and", int_nvvm_barrier_cta_red_and_aligned_count, I1RT>;
-defm BARRIER_CTA_RED_OR_ALIGNED : BARRIER_RED_COUNT<"bar.red.or", int_nvvm_barrier_cta_red_or_aligned_count, I1RT>;
-
-defm BARRIER_CTA_RED_POPC_ALL : BARRIER_RED_ALL<"barrier.red.popc", int_nvvm_barrier_cta_red_popc_all, I32RT, [hasPTX<60>]>;
-defm BARRIER_CTA_RED_AND_ALL : BARRIER_RED_ALL<"barrier.red.and", int_nvvm_barrier_cta_red_and_all, I1RT, [hasPTX<60>]>;
-defm BARRIER_CTA_RED_OR_ALL : BARRIER_RED_ALL<"barrier.red.or", int_nvvm_barrier_cta_red_or_all, I1RT, [hasPTX<60>]>;
-defm BARRIER_CTA_RED_POPC_COUNT : BARRIER_RED_COUNT<"barrier.red.popc", int_nvvm_barrier_cta_red_popc_count, I32RT, [hasPTX<60>]>;
-defm BARRIER_CTA_RED_AND_COUNT : BARRIER_RED_COUNT<"barrier.red.and", int_nvvm_barrier_cta_red_and_count, I1RT, [hasPTX<60>]>;
-defm BARRIER_CTA_RED_OR_COUNT : BARRIER_RED_COUNT<"barrier.red.or", int_nvvm_barrier_cta_red_or_count, I1RT, [hasPTX<60>]>;
+defm BARRIER_CTA_RED_POPC_ALIGNED_ALL : BARRIER_RED_ALL<"bar.red.popc.u32", int_nvvm_barrier_cta_red_popc_aligned_all, I32RT>;
+defm BARRIER_CTA_RED_AND_ALIGNED_ALL : BARRIER_RED_ALL<"bar.red.and.pred", int_nvvm_barrier_cta_red_and_aligned_all, I1RT>;
+defm BARRIER_CTA_RED_OR_ALIGNED_ALL : BARRIER_RED_ALL<"bar.red.or.pred", int_nvvm_barrier_cta_red_or_aligned_all, I1RT>;
+defm BARRIER_CTA_RED_POPC_ALIGNED : BARRIER_RED_COUNT<"bar.red.popc.u32", int_nvvm_barrier_cta_red_popc_aligned_count, I32RT>;
+defm BARRIER_CTA_RED_AND_ALIGNED : BARRIER_RED_COUNT<"bar.red.and.pred", int_nvvm_barrier_cta_red_and_aligned_count, I1RT>;
+defm BARRIER_CTA_RED_OR_ALIGNED : BARRIER_RED_COUNT<"bar.red.or.pred", int_nvvm_barrier_cta_red_or_aligned_count, I1RT>;
+
+defm BARRIER_CTA_RED_POPC_ALL : BARRIER_RED_ALL<"barrier.red.popc.u32", int_nvvm_barrier_cta_red_popc_all, I32RT, [hasPTX<60>]>;
+defm BARRIER_CTA_RED_AND_ALL : BARRIER_RED_ALL<"barrier.red.and.pred", int_nvvm_barrier_cta_red_and_all, I1RT, [hasPTX<60>]>;
+defm BARRIER_CTA_RED_OR_ALL : BARRIER_RED_ALL<"barrier.red.or.pred", int_nvvm_barrier_cta_red_or_all, I1RT, [hasPTX<60>]>;
+defm BARRIER_CTA_RED_POPC_COUNT : BARRIER_RED_COUNT<"barrier.red.popc.u32", int_nvvm_barrier_cta_red_popc_count, I32RT, [hasPTX<60>]>;
+defm BARRIER_CTA_RED_AND_COUNT : BARRIER_RED_COUNT<"barrier.red.and.pred", int_nvvm_barrier_cta_red_and_count, I1RT, [hasPTX<60>]>;
+defm BARRIER_CTA_RED_OR_COUNT : BARRIER_RED_COUNT<"barrier.red.or.pred", int_nvvm_barrier_cta_red_or_count, I1RT, [hasPTX<60>]>;
class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
diff --git a/llvm/test/CodeGen/NVPTX/barrier.ll b/llvm/test/CodeGen/NVPTX/barrier.ll
index c785f09fcf87e..957a3ab59b591 100644
--- a/llvm/test/CodeGen/NVPTX/barrier.ll
+++ b/llvm/test/CodeGen/NVPTX/barrier.ll
@@ -147,10 +147,10 @@ define void @barrier_cta_red_popc_all(i32 %id, i1 %pred) {
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.b32 %r1, [barrier_cta_red_popc_all_param_0];
-; CHECK-NEXT: bar.red.popc %r2, %r1, %p1;
-; CHECK-NEXT: bar.red.popc %r3, 3, %p1;
-; CHECK-NEXT: barrier.red.popc %r4, %r1, %p1;
-; CHECK-NEXT: barrier.red.popc %r5, 3, %p1;
+; CHECK-NEXT: bar.red.popc.u32 %r2, %r1, %p1;
+; CHECK-NEXT: bar.red.popc.u32 %r3, 3, %p1;
+; CHECK-NEXT: barrier.red.popc.u32 %r4, %r1, %p1;
+; CHECK-NEXT: barrier.red.popc.u32 %r5, 3, %p1;
; CHECK-NEXT: ret;
%v1 = call i32 @llvm.nvvm.barrier.cta.red.popc.aligned.all(i32 %id, i1 %pred)
%v2 = call i32 @llvm.nvvm.barrier.cta.red.popc.aligned.all(i32 3, i1 %pred)
@@ -172,24 +172,24 @@ define void @barrier_cta_red_popc_count(i32 %id, i32 %cnt, i1 %pred) {
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.b32 %r1, [barrier_cta_red_popc_count_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [barrier_cta_red_popc_count_param_1];
-; CHECK-NEXT: bar.red.popc %r3, %r1, %r2, %p1;
-; CHECK-NEXT: bar.red.popc %r4, 3, %r2, %p1;
-; CHECK-NEXT: barrier.red.popc %r5, %r1, %r2, %p1;
-; CHECK-NEXT: barrier.red.popc %r6, 3, %r2, %p1;
-; CHECK-NEXT: bar.red.popc %r7, %r1, 10, %p1;
-; CHECK-NEXT: bar.red.popc %r8, 3, 11, %p1;
-; CHECK-NEXT: barrier.red.popc %r9, %r1, 12, %p1;
-; CHECK-NEXT: barrier.red.popc %r10, 3, 13, %p1;
+; CHECK-NEXT: bar.red.popc.u32 %r3, %r1, %r2, %p1;
+; CHECK-NEXT: bar.red.popc.u32 %r4, 3, %r2, %p1;
+; CHECK-NEXT: barrier.red.popc.u32 %r5, %r1, %r2, %p1;
+; CHECK-NEXT: barrier.red.popc.u32 %r6, 3, %r2, %p1;
+; CHECK-NEXT: bar.red.popc.u32 %r7, %r1, 64, %p1;
+; CHECK-NEXT: bar.red.popc.u32 %r8, 3, 64, %p1;
+; CHECK-NEXT: barrier.red.popc.u32 %r9, %r1, 64, %p1;
+; CHECK-NEXT: barrier.red.popc.u32 %r10, 3, 64, %p1;
; CHECK-NEXT: ret;
%v1 = call i32 @llvm.nvvm.barrier.cta.red.popc.aligned.count(i32 %id, i32 %cnt, i1 %pred)
%v2 = call i32 @llvm.nvvm.barrier.cta.red.popc.aligned.count(i32 3, i32 %cnt, i1 %pred)
%v3 = call i32 @llvm.nvvm.barrier.cta.red.popc.count(i32 %id, i32 %cnt, i1 %pred)
%v4 = call i32 @llvm.nvvm.barrier.cta.red.popc.count(i32 3, i32 %cnt, i1 %pred)
- %v5 = call i32 @llvm.nvvm.barrier.cta.red.popc.aligned.count(i32 %id, i32 10, i1 %pred)
- %v6 = call i32 @llvm.nvvm.barrier.cta.red.popc.aligned.count(i32 3, i32 11, i1 %pred)
- %v7 = call i32 @llvm.nvvm.barrier.cta.red.popc.count(i32 %id, i32 12, i1 %pred)
- %v8 = call i32 @llvm.nvvm.barrier.cta.red.popc.count(i32 3, i32 13, i1 %pred)
+ %v5 = call i32 @llvm.nvvm.barrier.cta.red.popc.aligned.count(i32 %id, i32 64, i1 %pred)
+ %v6 = call i32 @llvm.nvvm.barrier.cta.red.popc.aligned.count(i32 3, i32 64, i1 %pred)
+ %v7 = call i32 @llvm.nvvm.barrier.cta.red.popc.count(i32 %id, i32 64, i1 %pred)
+ %v8 = call i32 @llvm.nvvm.barrier.cta.red.popc.count(i32 3, i32 64, i1 %pred)
ret void
}
@@ -205,10 +205,10 @@ define void @barrier_cta_red_and_all(i32 %id, i1 %pred) {
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.b32 %r1, [barrier_cta_red_and_all_param_0];
-; CHECK-NEXT: bar.red.and %p2, %r1, %p1;
-; CHECK-NEXT: bar.red.and %p3, 3, %p1;
-; CHECK-NEXT: barrier.red.and %p4, %r1, %p1;
-; CHECK-NEXT: barrier.red.and %p5, 3, %p1;
+; CHECK-NEXT: bar.red.and.pred %p2, %r1, %p1;
+; CHECK-NEXT: bar.red.and.pred %p3, 3, %p1;
+; CHECK-NEXT: barrier.red.and.pred %p4, %r1, %p1;
+; CHECK-NEXT: barrier.red.and.pred %p5, 3, %p1;
; CHECK-NEXT: ret;
%v1 = call i1 @llvm.nvvm.barrier.cta.red.and.aligned.all(i32 %id, i1 %pred)
%v2 = call i1 @llvm.nvvm.barrier.cta.red.and.aligned.all(i32 3, i1 %pred)
@@ -230,24 +230,24 @@ define void @barrier_cta_red_and_count(i32 %id, i32 %cnt, i1 %pred) {
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.b32 %r1, [barrier_cta_red_and_count_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [barrier_cta_red_and_count_param_1];
-; CHECK-NEXT: bar.red.and %p2, %r1, %r2, %p1;
-; CHECK-NEXT: bar.red.and %p3, 3, %r2, %p1;
-; CHECK-NEXT: barrier.red.and %p4, %r1, %r2, %p1;
-; CHECK-NEXT: barrier.red.and %p5, 3, %r2, %p1;
-; CHECK-NEXT: bar.red.and %p6, %r1, 10, %p1;
-; CHECK-NEXT: bar.red.and %p7, 3, 11, %p1;
-; CHECK-NEXT: barrier.red.and %p8, %r1, 12, %p1;
-; CHECK-NEXT: barrier.red.and %p9, 3, 13, %p1;
+; CHECK-NEXT: bar.red.and.pred %p2, %r1, %r2, %p1;
+; CHECK-NEXT: bar.red.and.pred %p3, 3, %r2, %p1;
+; CHECK-NEXT: barrier.red.and.pred %p4, %r1, %r2, %p1;
+; CHECK-NEXT: barrier.red.and.pred %p5, 3, %r2, %p1;
+; CHECK-NEXT: bar.red.and.pred %p6, %r1, 64, %p1;
+; CHECK-NEXT: bar.red.and.pred %p7, 3, 32, %p1;
+; CHECK-NEXT: barrier.red.and.pred %p8, %r1, 64, %p1;
+; CHECK-NEXT: barrier.red.and.pred %p9, 3, 64, %p1;
; CHECK-NEXT: ret;
%v1 = call i1 @llvm.nvvm.barrier.cta.red.and.aligned.count(i32 %id, i32 %cnt, i1 %pred)
%v2 = call i1 @llvm.nvvm.barrier.cta.red.and.aligned.count(i32 3, i32 %cnt, i1 %pred)
%v3 = call i1 @llvm.nvvm.barrier.cta.red.and.count(i32 %id, i32 %cnt, i1 %pred)
%v4 = call i1 @llvm.nvvm.barrier.cta.red.and.count(i32 3, i32 %cnt, i1 %pred)
- %v5 = call i1 @llvm.nvvm.barrier.cta.red.and.aligned.count(i32 %id, i32 10, i1 %pred)
- %v6 = call i1 @llvm.nvvm.barrier.cta.red.and.aligned.count(i32 3, i32 11, i1 %pred)
- %v7 = call i1 @llvm.nvvm.barrier.cta.red.and.count(i32 %id, i32 12, i1 %pred)
- %v8 = call i1 @llvm.nvvm.barrier.cta.red.and.count(i32 3, i32 13, i1 %pred)
+ %v5 = call i1 @llvm.nvvm.barrier.cta.red.and.aligned.count(i32 %id, i32 64, i1 %pred)
+ %v6 = call i1 @llvm.nvvm.barrier.cta.red.and.aligned.count(i32 3, i32 32, i1 %pred)
+ %v7 = call i1 @llvm.nvvm.barrier.cta.red.and.count(i32 %id, i32 64, i1 %pred)
+ %v8 = call i1 @llvm.nvvm.barrier.cta.red.and.count(i32 3, i32 64, i1 %pred)
ret void
}
@@ -263,10 +263,10 @@ define void @barrier_cta_red_or_all(i32 %id, i1 %pred) {
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.b32 %r1, [barrier_cta_red_or_all_param_0];
-; CHECK-NEXT: bar.red.or %p2, %r1, %p1;
-; CHECK-NEXT: bar.red.or %p3, 3, %p1;
-; CHECK-NEXT: barrier.red.or %p4, %r1, %p1;
-; CHECK-NEXT: barrier.red.or %p5, 3, %p1;
+; CHECK-NEXT: bar.red.or.pred %p2, %r1, %p1;
+; CHECK-NEXT: bar.red.or.pred %p3, 3, %p1;
+; CHECK-NEXT: barrier.red.or.pred %p4, %r1, %p1;
+; CHECK-NEXT: barrier.red.or.pred %p5, 3, %p1;
; CHECK-NEXT: ret;
%v1 = call i1 @llvm.nvvm.barrier.cta.red.or.aligned.all(i32 %id, i1 %pred)
%v2 = call i1 @llvm.nvvm.barrier.cta.red.or.aligned.all(i32 3, i1 %pred)
@@ -288,23 +288,23 @@ define void @barrier_cta_red_or_count(i32 %id, i32 %cnt, i1 %pred) {
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.b32 %r1, [barrier_cta_red_or_count_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [barrier_cta_red_or_count_param_1];
-; CHECK-NEXT: bar.red.or %p2, %r1, %r2, %p1;
-; CHECK-NEXT: bar.red.or %p3, 3, %r2, %p1;
-; CHECK-NEXT: barrier.red.or %p4, %r1, %r2, %p1;
-; CHECK-NEXT: barrier.red.or %p5, 3, %r2, %p1;
-; CHECK-NEXT: bar.red.or %p6, %r1, 10, %p1;
-; CHECK-NEXT: bar.red.or %p7, 3, 11, %p1;
-; CHECK-NEXT: barrier.red.or %p8, %r1, 12, %p1;
-; CHECK-NEXT: barrier.red.or %p9, 3, 13, %p1;
+; CHECK-NEXT: bar.red.or.pred %p2, %r1, %r2, %p1;
+; CHECK-NEXT: bar.red.or.pred %p3, 3, %r2, %p1;
+; CHECK-NEXT: barrier.red.or.pred %p4, %r1, %r2, %p1;
+; CHECK-NEXT: barrier.red.or.pred %p5, 3, %r2, %p1;
+; CHECK-NEXT: bar.red.or.pred %p6, %r1, 64, %p1;
+; CHECK-NEXT: bar.red.or.pred %p7, 3, 32, %p1;
+; CHECK-NEXT: barrier.red.or.pred %p8, %r1, 64, %p1;
+; CHECK-NEXT: barrier.red.or.pred %p9, 3, 64, %p1;
; CHECK-NEXT: ret;
%v1 = call i1 @llvm.nvvm.barrier.cta.red.or.aligned.count(i32 %id, i32 %cnt, i1 %pred)
%v2 = call i1 @llvm.nvvm.barrier.cta.red.or.aligned.count(i32 3, i32 %cnt, i1 %pred)
%v3 = call i1 @llvm.nvvm.barrier.cta.red.or.count(i32 %id, i32 %cnt, i1 %pred)
%v4 = call i1 @llvm.nvvm.barrier.cta.red.or.count(i32 3, i32 %cnt, i1 %pred)
- %v5 = call i1 @llvm.nvvm.barrier.cta.red.or.aligned.count(i32 %id, i32 10, i1 %pred)
- %v6 = call i1 @llvm.nvvm.barrier.cta.red.or.aligned.count(i32 3, i32 11, i1 %pred)
- %v7 = call i1 @llvm.nvvm.barrier.cta.red.or.count(i32 %id, i32 12, i1 %pred)
- %v8 = call i1 @llvm.nvvm.barrier.cta.red.or.count(i32 3, i32 13, i1 %pred)
+ %v5 = call i1 @llvm.nvvm.barrier.cta.red.or.aligned.count(i32 %id, i32 64, i1 %pred)
+ %v6 = call i1 @llvm.nvvm.barrier.cta.red.or.aligned.count(i32 3, i32 32, i1 %pred)
+ %v7 = call i1 @llvm.nvvm.barrier.cta.red.or.count(i32 %id, i32 64, i1 %pred)
+ %v8 = call i1 @llvm.nvvm.barrier.cta.red.or.count(i32 3, i32 64, i1 %pred)
ret void
}
|
schwarzschild-radius
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, Thanks!
Wolfram70
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/28987 Here is the relevant piece of the build log for the reference |
Fixes an oversight in the PTX instruction names used in #172541.