Skip to content

Commit 35a2b60

Browse files
authored
[SPIRV][HLSL] Add lowering of rsqrt to SPIRV (#95849)
Add lowering of `rsqrt` to SPIRV. Fixes #88949
1 parent 30efdce commit 35a2b60

File tree

6 files changed

+164
-41
lines changed

6 files changed

+164
-41
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18331,8 +18331,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1833118331
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1833218332
llvm_unreachable("rsqrt operand must have a float representation");
1833318333
return Builder.CreateIntrinsic(
18334-
/*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt,
18335-
ArrayRef<Value *>{Op0}, nullptr, "dx.rsqrt");
18334+
/*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
18335+
ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
1833618336
}
1833718337
case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
1833818338
return EmitRuntimeCall(CGM.CreateRuntimeFunction(

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class CGHLSLRuntime {
7575
GENERATE_HLSL_INTRINSIC_FUNCTION(All, all)
7676
GENERATE_HLSL_INTRINSIC_FUNCTION(Any, any)
7777
GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
78+
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
7879
GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
7980

8081
//===----------------------------------------------------------------------===//
Lines changed: 70 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,84 @@
11
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
22
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
3-
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
4-
// RUN: --check-prefixes=CHECK,NATIVE_HALF
3+
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
4+
// RUN: --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
55
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
66
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
7-
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
7+
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF,DXIL_NO_HALF
8+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
9+
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
10+
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
11+
// RUN: --check-prefixes=CHECK,SPIR_CHECK,NATIVE_HALF,SPIR_NATIVE_HALF
12+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
13+
// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
14+
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,SPIR_CHECK,NO_HALF,SPIR_NO_HALF
815

9-
// NATIVE_HALF: define noundef half @
10-
// NATIVE_HALF: %dx.rsqrt = call half @llvm.dx.rsqrt.f16(
11-
// NATIVE_HALF: ret half %dx.rsqrt
12-
// NO_HALF: define noundef float @"?test_rsqrt_half@@YA$halff@$halff@@Z"(
13-
// NO_HALF: %dx.rsqrt = call float @llvm.dx.rsqrt.f32(
14-
// NO_HALF: ret float %dx.rsqrt
16+
// DXIL_NATIVE_HALF: define noundef half @
17+
// SPIR_NATIVE_HALF: define spir_func noundef half @
18+
// DXIL_NATIVE_HALF: %hlsl.rsqrt = call half @llvm.dx.rsqrt.f16(
19+
// SPIR_NATIVE_HALF: %hlsl.rsqrt = call half @llvm.spv.rsqrt.f16(
20+
// NATIVE_HALF: ret half %hlsl.rsqrt
21+
// DXIL_NO_HALF: define noundef float @
22+
// SPIR_NO_HALF: define spir_func noundef float @
23+
// DXIL_NO_HALF: %hlsl.rsqrt = call float @llvm.dx.rsqrt.f32(
24+
// SPIR_NO_HALF: %hlsl.rsqrt = call float @llvm.spv.rsqrt.f32(
25+
// NO_HALF: ret float %hlsl.rsqrt
1526
half test_rsqrt_half(half p0) { return rsqrt(p0); }
16-
// NATIVE_HALF: define noundef <2 x half> @
17-
// NATIVE_HALF: %dx.rsqrt = call <2 x half> @llvm.dx.rsqrt.v2f16
18-
// NATIVE_HALF: ret <2 x half> %dx.rsqrt
19-
// NO_HALF: define noundef <2 x float> @
20-
// NO_HALF: %dx.rsqrt = call <2 x float> @llvm.dx.rsqrt.v2f32(
21-
// NO_HALF: ret <2 x float> %dx.rsqrt
27+
// DXIL_NATIVE_HALF: define noundef <2 x half> @
28+
// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
29+
// DXIL_NATIVE_HALF: %hlsl.rsqrt = call <2 x half> @llvm.dx.rsqrt.v2f16
30+
// SPIR_NATIVE_HALF: %hlsl.rsqrt = call <2 x half> @llvm.spv.rsqrt.v2f16
31+
// NATIVE_HALF: ret <2 x half> %hlsl.rsqrt
32+
// DXIL_NO_HALF: define noundef <2 x float> @
33+
// SPIR_NO_HALF: define spir_func noundef <2 x float> @
34+
// DXIL_NO_HALF: %hlsl.rsqrt = call <2 x float> @llvm.dx.rsqrt.v2f32(
35+
// SPIR_NO_HALF: %hlsl.rsqrt = call <2 x float> @llvm.spv.rsqrt.v2f32(
36+
// NO_HALF: ret <2 x float> %hlsl.rsqrt
2237
half2 test_rsqrt_half2(half2 p0) { return rsqrt(p0); }
23-
// NATIVE_HALF: define noundef <3 x half> @
24-
// NATIVE_HALF: %dx.rsqrt = call <3 x half> @llvm.dx.rsqrt.v3f16
25-
// NATIVE_HALF: ret <3 x half> %dx.rsqrt
26-
// NO_HALF: define noundef <3 x float> @
27-
// NO_HALF: %dx.rsqrt = call <3 x float> @llvm.dx.rsqrt.v3f32(
28-
// NO_HALF: ret <3 x float> %dx.rsqrt
38+
// DXIL_NATIVE_HALF: define noundef <3 x half> @
39+
// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
40+
// DXIL_NATIVE_HALF: %hlsl.rsqrt = call <3 x half> @llvm.dx.rsqrt.v3f16
41+
// SPIR_NATIVE_HALF: %hlsl.rsqrt = call <3 x half> @llvm.spv.rsqrt.v3f16
42+
// NATIVE_HALF: ret <3 x half> %hlsl.rsqrt
43+
// DXIL_NO_HALF: define noundef <3 x float> @
44+
// SPIR_NO_HALF: define spir_func noundef <3 x float> @
45+
// DXIL_NO_HALF: %hlsl.rsqrt = call <3 x float> @llvm.dx.rsqrt.v3f32(
46+
// SPIR_NO_HALF: %hlsl.rsqrt = call <3 x float> @llvm.spv.rsqrt.v3f32(
47+
// NO_HALF: ret <3 x float> %hlsl.rsqrt
2948
half3 test_rsqrt_half3(half3 p0) { return rsqrt(p0); }
30-
// NATIVE_HALF: define noundef <4 x half> @
31-
// NATIVE_HALF: %dx.rsqrt = call <4 x half> @llvm.dx.rsqrt.v4f16
32-
// NATIVE_HALF: ret <4 x half> %dx.rsqrt
33-
// NO_HALF: define noundef <4 x float> @
34-
// NO_HALF: %dx.rsqrt = call <4 x float> @llvm.dx.rsqrt.v4f32(
35-
// NO_HALF: ret <4 x float> %dx.rsqrt
49+
// DXIL_NATIVE_HALF: define noundef <4 x half> @
50+
// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
51+
// DXIL_NATIVE_HALF: %hlsl.rsqrt = call <4 x half> @llvm.dx.rsqrt.v4f16
52+
// SPIR_NATIVE_HALF: %hlsl.rsqrt = call <4 x half> @llvm.spv.rsqrt.v4f16
53+
// NATIVE_HALF: ret <4 x half> %hlsl.rsqrt
54+
// DXIL_NO_HALF: define noundef <4 x float> @
55+
// SPIR_NO_HALF: define spir_func noundef <4 x float> @
56+
// DXIL_NO_HALF: %hlsl.rsqrt = call <4 x float> @llvm.dx.rsqrt.v4f32(
57+
// SPIR_NO_HALF: %hlsl.rsqrt = call <4 x float> @llvm.spv.rsqrt.v4f32(
58+
// NO_HALF: ret <4 x float> %hlsl.rsqrt
3659
half4 test_rsqrt_half4(half4 p0) { return rsqrt(p0); }
3760

38-
// CHECK: define noundef float @
39-
// CHECK: %dx.rsqrt = call float @llvm.dx.rsqrt.f32(
40-
// CHECK: ret float %dx.rsqrt
61+
// DXIL_CHECK: define noundef float @
62+
// SPIR_CHECK: define spir_func noundef float @
63+
// DXIL_CHECK: %hlsl.rsqrt = call float @llvm.dx.rsqrt.f32(
64+
// SPIR_CHECK: %hlsl.rsqrt = call float @llvm.spv.rsqrt.f32(
65+
// CHECK: ret float %hlsl.rsqrt
4166
float test_rsqrt_float(float p0) { return rsqrt(p0); }
42-
// CHECK: define noundef <2 x float> @
43-
// CHECK: %dx.rsqrt = call <2 x float> @llvm.dx.rsqrt.v2f32
44-
// CHECK: ret <2 x float> %dx.rsqrt
67+
// DXIL_CHECK: define noundef <2 x float> @
68+
// SPIR_CHECK: define spir_func noundef <2 x float> @
69+
// DXIL_CHECK: %hlsl.rsqrt = call <2 x float> @llvm.dx.rsqrt.v2f32
70+
// SPIR_CHECK: %hlsl.rsqrt = call <2 x float> @llvm.spv.rsqrt.v2f32
71+
// CHECK: ret <2 x float> %hlsl.rsqrt
4572
float2 test_rsqrt_float2(float2 p0) { return rsqrt(p0); }
46-
// CHECK: define noundef <3 x float> @
47-
// CHECK: %dx.rsqrt = call <3 x float> @llvm.dx.rsqrt.v3f32
48-
// CHECK: ret <3 x float> %dx.rsqrt
73+
// DXIL_CHECK: define noundef <3 x float> @
74+
// SPIR_CHECK: define spir_func noundef <3 x float> @
75+
// DXIL_CHECK: %hlsl.rsqrt = call <3 x float> @llvm.dx.rsqrt.v3f32
76+
// SPIR_CHECK: %hlsl.rsqrt = call <3 x float> @llvm.spv.rsqrt.v3f32
77+
// CHECK: ret <3 x float> %hlsl.rsqrt
4978
float3 test_rsqrt_float3(float3 p0) { return rsqrt(p0); }
50-
// CHECK: define noundef <4 x float> @
51-
// CHECK: %dx.rsqrt = call <4 x float> @llvm.dx.rsqrt.v4f32
52-
// CHECK: ret <4 x float> %dx.rsqrt
79+
// DXIL_CHECK: define noundef <4 x float> @
80+
// SPIR_CHECK: define spir_func noundef <4 x float> @
81+
// DXIL_CHECK: %hlsl.rsqrt = call <4 x float> @llvm.dx.rsqrt.v4f32
82+
// SPIR_CHECK: %hlsl.rsqrt = call <4 x float> @llvm.spv.rsqrt.v4f32
83+
// CHECK: ret <4 x float> %hlsl.rsqrt
5384
float4 test_rsqrt_float4(float4 p0) { return rsqrt(p0); }

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,5 @@ let TargetPrefix = "spv" in {
6262
def int_spv_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>;
6363
def int_spv_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
6464
[IntrNoMem, IntrWillReturn] >;
65+
def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
6566
}

llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
173173
bool selectFmix(Register ResVReg, const SPIRVType *ResType,
174174
MachineInstr &I) const;
175175

176+
bool selectRsqrt(Register ResVReg, const SPIRVType *ResType,
177+
MachineInstr &I) const;
178+
176179
void renderImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
177180
int OpIdx) const;
178181
void renderFImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
@@ -1315,6 +1318,23 @@ bool SPIRVInstructionSelector::selectFmix(Register ResVReg,
13151318
.constrainAllUses(TII, TRI, RBI);
13161319
}
13171320

1321+
bool SPIRVInstructionSelector::selectRsqrt(Register ResVReg,
1322+
const SPIRVType *ResType,
1323+
MachineInstr &I) const {
1324+
1325+
assert(I.getNumOperands() == 3);
1326+
assert(I.getOperand(2).isReg());
1327+
MachineBasicBlock &BB = *I.getParent();
1328+
1329+
return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
1330+
.addDef(ResVReg)
1331+
.addUse(GR.getSPIRVTypeID(ResType))
1332+
.addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
1333+
.addImm(GL::InverseSqrt)
1334+
.addUse(I.getOperand(2).getReg())
1335+
.constrainAllUses(TII, TRI, RBI);
1336+
}
1337+
13181338
bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg,
13191339
const SPIRVType *ResType,
13201340
MachineInstr &I) const {
@@ -1992,6 +2012,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
19922012
return selectAny(ResVReg, ResType, I);
19932013
case Intrinsic::spv_lerp:
19942014
return selectFmix(ResVReg, ResType, I);
2015+
case Intrinsic::spv_rsqrt:
2016+
return selectRsqrt(ResVReg, ResType, I);
19952017
case Intrinsic::spv_lifetime_start:
19962018
case Intrinsic::spv_lifetime_end: {
19972019
unsigned Op = IID == Intrinsic::spv_lifetime_start ? SPIRV::OpLifetimeStart
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
2+
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
3+
4+
; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
5+
6+
; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
7+
; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
8+
; CHECK-DAG: %[[#float_64:]] = OpTypeFloat 64
9+
10+
; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
11+
; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
12+
; CHECK-DAG: %[[#vec4_float_64:]] = OpTypeVector %[[#float_64]] 4
13+
14+
define noundef float @rsqrt_float(float noundef %a) {
15+
entry:
16+
; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]]
17+
; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] InverseSqrt %[[#float_32_arg]]
18+
%elt.rsqrt = call float @llvm.spv.rsqrt.f32(float %a)
19+
ret float %elt.rsqrt
20+
}
21+
22+
define noundef half @rsqrt_half(half noundef %a) {
23+
entry:
24+
; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]]
25+
; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] InverseSqrt %[[#float_16_arg]]
26+
%elt.rsqrt = call half @llvm.spv.rsqrt.f16(half %a)
27+
ret half %elt.rsqrt
28+
}
29+
30+
define noundef double @rsqrt_double(double noundef %a) {
31+
entry:
32+
; CHECK: %[[#float_64_arg:]] = OpFunctionParameter %[[#float_64]]
33+
; CHECK: %[[#]] = OpExtInst %[[#float_64]] %[[#op_ext_glsl]] InverseSqrt %[[#float_64_arg]]
34+
%elt.rsqrt = call double @llvm.spv.rsqrt.f64(double %a)
35+
ret double %elt.rsqrt
36+
}
37+
38+
define noundef <4 x float> @rsqrt_float_vector(<4 x float> noundef %a) {
39+
entry:
40+
; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]]
41+
; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] InverseSqrt %[[#vec4_float_32_arg]]
42+
%elt.rsqrt = call <4 x float> @llvm.spv.rsqrt.v4f32(<4 x float> %a)
43+
ret <4 x float> %elt.rsqrt
44+
}
45+
46+
define noundef <4 x half> @rsqrt_half_vector(<4 x half> noundef %a) {
47+
entry:
48+
; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]]
49+
; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] InverseSqrt %[[#vec4_float_16_arg]]
50+
%elt.rsqrt = call <4 x half> @llvm.spv.rsqrt.v4f16(<4 x half> %a)
51+
ret <4 x half> %elt.rsqrt
52+
}
53+
54+
define noundef <4 x double> @rsqrt_double_vector(<4 x double> noundef %a) {
55+
entry:
56+
; CHECK: %[[#vec4_float_64_arg:]] = OpFunctionParameter %[[#vec4_float_64]]
57+
; CHECK: %[[#]] = OpExtInst %[[#vec4_float_64]] %[[#op_ext_glsl]] InverseSqrt %[[#vec4_float_64_arg]]
58+
%elt.rsqrt = call <4 x double> @llvm.spv.rsqrt.v4f64(<4 x double> %a)
59+
ret <4 x double> %elt.rsqrt
60+
}
61+
62+
declare half @llvm.spv.rsqrt.f16(half)
63+
declare float @llvm.spv.rsqrt.f32(float)
64+
declare double @llvm.spv.rsqrt.f64(double)
65+
66+
declare <4 x float> @llvm.spv.rsqrt.v4f32(<4 x float>)
67+
declare <4 x half> @llvm.spv.rsqrt.v4f16(<4 x half>)
68+
declare <4 x double> @llvm.spv.rsqrt.v4f64(<4 x double>)

0 commit comments

Comments
 (0)