Skip to content

Commit d532641

Browse files
authored
[libclc] Improve __clc_min/max/clamp implementation (#172599)
Replace __clc_max/min with __clc_fmax/fmin in __clc_clamp. FP __clc_min/max/clamp now lowers to @llvm.minimumnum/@llvm.maximumnum, and integer clamp lowers to @llvm.umin/@llvm.umax. This reduce fcmp+select chains and improving codegen. Example change to amdgcn--amdhsa.bc: ``` in function _Z5clamphhh: > %4 = icmp ugt i8 %0, %2 %4 = tail call noundef i8 @llvm.umax.i8(i8 %0, i8 %1) > %6 = select i1 %4, i8 %2, i8 %5 > ret i8 %6 < %5 = tail call noundef i8 @llvm.umin.i8(i8 %2, i8 %4) < ret i8 %5 in function _Z5clampddd: in block %3 / %3: > %4 = fcmp ogt double %0, %2 > %5 = fcmp olt double %0, %1 > %6 = select i1 %5, double %1, double %0 > %7 = select i1 %4, double %2, double %6 > ret double %7 < %4 = tail call noundef double @llvm.maximumnum.f64(double %0, double %1) < %5 = tail call noundef double @llvm.minimumnum.f64(double %4, double %2) < ret double %5 ```
1 parent 37a73d5 commit d532641

File tree

6 files changed

+43
-5
lines changed

6 files changed

+43
-5
lines changed

libclc/clc/lib/generic/shared/clc_clamp.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/internal/clc.h>
10+
#include <clc/shared/clc_max.h>
11+
#include <clc/shared/clc_min.h>
1012

1113
#define __CLC_BODY <clc_clamp.inc>
1214
#include <clc/integer/gentype.inc>

libclc/clc/lib/generic/shared/clc_clamp.inc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,13 @@
99
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x,
1010
__CLC_GENTYPE y,
1111
__CLC_GENTYPE z) {
12-
return (x > z ? z : (x < y ? y : x));
12+
return __clc_min(__clc_max(x, y), z);
1313
}
1414

1515
#ifndef __CLC_SCALAR
1616
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x,
1717
__CLC_SCALAR_GENTYPE y,
1818
__CLC_SCALAR_GENTYPE z) {
19-
return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z
20-
: (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x));
19+
return __clc_min(__clc_max(x, (__CLC_GENTYPE)y), (__CLC_GENTYPE)z);
2120
}
2221
#endif

libclc/clc/lib/generic/shared/clc_max.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/internal/clc.h>
10+
#include <clc/math/clc_fmax.h>
1011

1112
#define __CLC_BODY <clc_max.inc>
1213
#include <clc/integer/gentype.inc>

libclc/clc/lib/generic/shared/clc_max.inc

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,22 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#ifdef __CLC_FPSIZE
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
12+
__CLC_GENTYPE b) {
13+
return __clc_fmax(a, b);
14+
}
15+
16+
#ifndef __CLC_SCALAR
17+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
18+
__CLC_SCALAR_GENTYPE b) {
19+
return __clc_fmax(a, b);
20+
}
21+
#endif // __CLC_SCALAR
22+
23+
#else // __CLC_FPSIZE
24+
925
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
1026
__CLC_GENTYPE b) {
1127
return (a > b ? a : b);
@@ -16,4 +32,6 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
1632
__CLC_SCALAR_GENTYPE b) {
1733
return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b);
1834
}
19-
#endif
35+
#endif // __CLC_SCALAR
36+
37+
#endif // __CLC_FPSIZE

libclc/clc/lib/generic/shared/clc_min.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/internal/clc.h>
10+
#include <clc/math/clc_fmin.h>
1011

1112
#define __CLC_BODY <clc_min.inc>
1213
#include <clc/integer/gentype.inc>

libclc/clc/lib/generic/shared/clc_min.inc

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,21 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#ifdef __CLC_FPSIZE
10+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
11+
__CLC_GENTYPE b) {
12+
return __clc_fmin(a, b);
13+
}
14+
15+
#ifndef __CLC_SCALAR
16+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
17+
__CLC_SCALAR_GENTYPE b) {
18+
return __clc_fmin(a, b);
19+
}
20+
#endif // __CLC_SCALAR
21+
22+
#else // __CLC_FPSIZE
23+
924
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
1025
__CLC_GENTYPE b) {
1126
return (b < a ? b : a);
@@ -16,4 +31,6 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
1631
__CLC_SCALAR_GENTYPE b) {
1732
return (b < (__CLC_GENTYPE)a ? (__CLC_GENTYPE)b : a);
1833
}
19-
#endif
34+
#endif // __CLC_SCALAR
35+
36+
#endif // __CLC_FPSIZE

0 commit comments

Comments
 (0)