Skip to content

Commit 97b7f11

Browse files
committed
[Arm64EC] Add support for half
`f16` is passed and returned in vector registers on both x86 on AArch64, the same calling convention as `f32`, so it is a straightforward type to support. The calling convention support already exists, added as part of a6065f0 ("Arm64EC entry/exit thunks, consolidated. (#79067)"). Thus, add mangling and remove the error in order to make `half` work. MSVC does not yet support `_Float16`, so for now this will remain an LLVM-only extension.
1 parent 7f0e407 commit 97b7f11

File tree

7 files changed

+99
-50
lines changed

7 files changed

+99
-50
lines changed

llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,11 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
316316
ThunkArgTranslation::PointerIndirection};
317317
};
318318

319+
if (T->isHalfTy()) {
320+
Out << "h";
321+
return direct(T);
322+
}
323+
319324
if (T->isFloatTy()) {
320325
Out << "f";
321326
return direct(T);
@@ -327,8 +332,8 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
327332
}
328333

329334
if (T->isFloatingPointTy()) {
330-
report_fatal_error(
331-
"Only 32 and 64 bit floating points are supported for ARM64EC thunks");
335+
report_fatal_error("Only 16, 32, and 64 bit floating points are supported "
336+
"for ARM64EC thunks");
332337
}
333338

334339
auto &DL = M->getDataLayout();
@@ -342,8 +347,15 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
342347
uint64_t ElementCnt = T->getArrayNumElements();
343348
uint64_t ElementSizePerBytes = DL.getTypeSizeInBits(ElementTy) / 8;
344349
uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes;
345-
if (ElementTy->isFloatTy() || ElementTy->isDoubleTy()) {
346-
Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes;
350+
if (ElementTy->isHalfTy() || ElementTy->isFloatTy() ||
351+
ElementTy->isDoubleTy()) {
352+
if (ElementTy->isHalfTy())
353+
Out << "H";
354+
else if (ElementTy->isFloatTy())
355+
Out << "F";
356+
else if (ElementTy->isDoubleTy())
357+
Out << "D";
358+
Out << TotalSizeBytes;
347359
if (Alignment.value() >= 16 && !Ret)
348360
Out << "a" << Alignment.value();
349361
if (TotalSizeBytes <= 8) {
@@ -355,8 +367,9 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
355367
return pointerIndirection(T);
356368
}
357369
} else if (T->isFloatingPointTy()) {
358-
report_fatal_error("Only 32 and 64 bit floating points are supported for "
359-
"ARM64EC thunks");
370+
report_fatal_error(
371+
"Only 16, 32, and 64 bit floating points are supported "
372+
"for ARM64EC thunks");
360373
}
361374
}
362375

llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ define i64 @simple_integers(i8, i16, i32, i64) nounwind {
8585
ret i64 0
8686
}
8787

88-
; NOTE: Only float and double are supported.
89-
define double @simple_floats(float, double) nounwind {
90-
; CHECK-LABEL: .def $ientry_thunk$cdecl$d$fd;
91-
; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$fd
88+
; NOTE: Only half, float, and double are supported.
89+
define double @simple_floats(half, float, double) nounwind {
90+
; CHECK-LABEL: .def $ientry_thunk$cdecl$d$hfd;
91+
; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$hfd
9292
; CHECK: // %bb.0:
9393
; CHECK-NEXT: stp q6, q7, [sp, #-176]! // 32-byte Folded Spill
9494
; CHECK-NEXT: .seh_save_any_reg_px q6, 176
@@ -600,7 +600,7 @@ start:
600600
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$i8$i8i8i8i8
601601
; CHECK-NEXT: .word 1
602602
; CHECK-NEXT: .symidx "#simple_floats"
603-
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$d$fd
603+
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$d$hfd
604604
; CHECK-NEXT: .word 1
605605
; CHECK-NEXT: .symidx "#has_varargs"
606606
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$v$varargs

llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,10 @@ declare i64 @simple_integers(i8, i16, i32, i64) nounwind;
9393
; CHECK-NEXT: .seh_endfunclet
9494
; CHECK-NEXT: .seh_endproc
9595

96-
; NOTE: Only float and double are supported.
97-
declare double @simple_floats(float, double) nounwind;
98-
; CHECK-LABEL: .def $iexit_thunk$cdecl$d$fd;
99-
; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$fd
96+
; NOTE: Only half, float, and double are supported.
97+
declare double @simple_floats(half, float, double) nounwind;
98+
; CHECK-LABEL: .def $iexit_thunk$cdecl$d$hfd;
99+
; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$hfd
100100
; CHECK: // %bb.0:
101101
; CHECK-NEXT: sub sp, sp, #48
102102
; CHECK-NEXT: .seh_stackalloc 48
@@ -129,8 +129,8 @@ declare double @simple_floats(float, double) nounwind;
129129
; CHECK-NEXT: adrp x11, simple_floats
130130
; CHECK-NEXT: add x11, x11, :lo12:simple_floats
131131
; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
132-
; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$fd
133-
; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$fd
132+
; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$hfd
133+
; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$hfd
134134
; CHECK-NEXT: blr x8
135135
; CHECK-NEXT: .seh_startepilogue
136136
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -282,33 +282,36 @@ declare void @has_aligned_sret(ptr align 32 sret(%TSRet)) nounwind;
282282
; CHECK: .seh_endfunclet
283283
; CHECK: .seh_endproc
284284

285-
declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind;
286-
; CHECK-LABEL: .def $iexit_thunk$cdecl$m2$m2F8;
287-
; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2F8
285+
declare [2 x i8] @small_array([2 x i8], [2 x half], [2 x float]) nounwind;
286+
; CHECK-LABEL: .def $iexit_thunk$cdecl$m2$m2H4F8;
287+
; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2H4F8
288288
; CHECK: // %bb.0:
289-
; CHECK-NEXT: sub sp, sp, #64
290-
; CHECK-NEXT: .seh_stackalloc 64
291-
; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill
292-
; CHECK-NEXT: .seh_save_fplr 48
293-
; CHECK-NEXT: add x29, sp, #48
294-
; CHECK-NEXT: .seh_add_fp 48
289+
; CHECK-NEXT: sub sp, sp, #80
290+
; CHECK-NEXT: .seh_stackalloc 80
291+
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
292+
; CHECK-NEXT: .seh_save_fplr 64
293+
; CHECK-NEXT: add x29, sp, #64
294+
; CHECK-NEXT: .seh_add_fp 64
295295
; CHECK-NEXT: .seh_endprologue
296-
; CHECK-NEXT: sturb w1, [x29, #-1]
297-
; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect
298-
; CHECK-NEXT: sturb w0, [x29, #-2]
299-
; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
300-
; CHECK-NEXT: stp s0, s1, [x29, #-12]
301-
; CHECK-NEXT: ldurh w0, [x29, #-2]
302-
; CHECK-NEXT: ldur x1, [x29, #-12]
303-
; CHECK-NEXT: blr x16
304-
; CHECK-NEXT: mov w0, w8
305-
; CHECK-NEXT: sturh w8, [x29, #-14]
306-
; CHECK-NEXT: ubfx w1, w8, #8, #8
296+
; CHECK-NEXT: sturb w0, [x29, #-2]
297+
; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect
298+
; CHECK-NEXT: sturb w1, [x29, #-1]
299+
; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
300+
; CHECK-NEXT: stur h0, [x29, #-6]
301+
; CHECK-NEXT: ldurh w0, [x29, #-2]
302+
; CHECK-NEXT: stur h1, [x29, #-4]
303+
; CHECK-NEXT: stp s2, s3, [x29, #-16]
304+
; CHECK-NEXT: ldur w1, [x29, #-6]
305+
; CHECK-NEXT: ldur x2, [x29, #-16]
306+
; CHECK-NEXT: blr x16
307+
; CHECK-NEXT: mov w0, w8
308+
; CHECK-NEXT: sturh w8, [x29, #-18]
309+
; CHECK-NEXT: ubfx w1, w8, #8, #8
307310
; CHECK-NEXT: .seh_startepilogue
308-
; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload
309-
; CHECK-NEXT: .seh_save_fplr 48
310-
; CHECK-NEXT: add sp, sp, #64
311-
; CHECK-NEXT: .seh_stackalloc 64
311+
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
312+
; CHECK-NEXT: .seh_save_fplr 64
313+
; CHECK-NEXT: add sp, sp, #80
314+
; CHECK-NEXT: .seh_stackalloc 80
312315
; CHECK-NEXT: .seh_endepilogue
313316
; CHECK-NEXT: ret
314317
; CHECK-NEXT: .seh_endfunclet
@@ -325,8 +328,8 @@ declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind;
325328
; CHECK-NEXT: adrp x11, small_array
326329
; CHECK-NEXT: add x11, x11, :lo12:small_array
327330
; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
328-
; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2F8
329-
; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2F8
331+
; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2H4F8
332+
; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2H4F8
330333
; CHECK-NEXT: blr x8
331334
; CHECK-NEXT: .seh_startepilogue
332335
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -577,7 +580,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
577580
; CHECK-NEXT: .symidx simple_integers
578581
; CHECK-NEXT: .word 0
579582
; CHECK-NEXT: .symidx simple_floats
580-
; CHECK-NEXT: .symidx $iexit_thunk$cdecl$d$fd
583+
; CHECK-NEXT: .symidx $iexit_thunk$cdecl$d$hfd
581584
; CHECK-NEXT: .word 4
582585
; CHECK-NEXT: .symidx "#simple_floats$exit_thunk"
583586
; CHECK-NEXT: .symidx simple_floats
@@ -601,7 +604,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
601604
; CHECK-NEXT: .symidx has_aligned_sret
602605
; CHECK-NEXT: .word 0
603606
; CHECK-NEXT: .symidx small_array
604-
; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m2$m2F8
607+
; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m2$m2H4F8
605608
; CHECK-NEXT: .word 4
606609
; CHECK-NEXT: .symidx "#small_array$exit_thunk"
607610
; CHECK-NEXT: .symidx small_array
@@ -634,14 +637,14 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
634637
define void @func_caller() nounwind {
635638
call void @no_op()
636639
call i64 @simple_integers(i8 0, i16 0, i32 0, i64 0)
637-
call double @simple_floats(float 0.0, double 0.0)
640+
call double @simple_floats(half 0.0, float 0.0, double 0.0)
638641
call void (...) @has_varargs()
639642
%c = alloca i8
640643
call void @has_sret(ptr sret([100 x i8]) %c)
641644
%aligned = alloca %TSRet, align 32
642645
store %TSRet { i64 0, i64 0 }, ptr %aligned, align 32
643646
call void @has_aligned_sret(ptr align 32 sret(%TSRet) %aligned)
644-
call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x float] [float 0.0, float 0.0])
647+
call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x half] [half 0.0, half 0.0], [2 x float] [float 0.0, float 0.0])
645648
call [3 x i64] @large_array([3 x i64] [i64 0, i64 0, i64 0], [2 x double] [double 0.0, double 0.0], [2 x [2 x i64]] [[2 x i64] [i64 0, i64 0], [2 x i64] [i64 0, i64 0]])
646649
call %T2 @simple_struct(%T1 { i16 0 }, %T2 { i32 0, float 0.0 }, %T3 { i64 0, double 0.0 }, %T4 { i64 0, double 0.0, i8 0 })
647650
call <4 x i8> @small_vector(<4 x i8> <i8 0, i8 0, i8 0, i8 0>)

llvm/test/CodeGen/AArch64/frexp-arm64ec.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
; Separate from llvm-frexp.ll test because this errors on half cases
44

5+
; ARM64EC-LABEL: test_frexp_f16_i32
6+
; ARM64EC: fcvt d0, h0
7+
; ARM64EC: bl "#frexp"
8+
; ARM64EC: fcvt h0, d0
9+
define { half, i32 } @test_frexp_f16_i32(half %a) {
10+
%result = call { half, i32 } @llvm.frexp.f16.i32(half %a)
11+
ret { half, i32 } %result
12+
}
13+
514
; ARM64EC-LABEL: test_frexp_f32_i32
615
; ARM64EC: fcvt d0, s0
716
; ARM64EC: bl "#frexp"

llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,15 @@
33

44
; Separate from ldexp.ll test because this errors on half cases
55

6+
; ARM64EC-LABEL: ldexp_f16 =
7+
; ARM64EC: fcvt d0, h0
8+
; ARM64EC: bl "#ldexp"
9+
; ARM64EC: fcvt h0, d0
10+
define half @ldexp_f16(half %val, i32 %a) {
11+
%call = call half @llvm.ldexp.f16(half %val, i32 %a)
12+
ret half %call
13+
}
14+
615
; ARM64EC-LABEL: ldexp_f32 =
716
; ARM64EC: fcvt d0, s0
817
; ARM64EC: bl "#ldexp"

llvm/test/CodeGen/AArch64/powi-arm64ec.ll

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,18 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple=arm64ec-windows-msvc < %s | FileCheck -check-prefix=ARM64EC %s
33

4-
declare double @llvm.powi.f64.i32(double, i32)
4+
declare half @llvm.powi.f16.i32(half, i32)
55
declare float @llvm.powi.f32.i32(float, i32)
6+
declare double @llvm.powi.f64.i32(double, i32)
7+
8+
; ARM64EC-LABEL: powi_f16
9+
; ARM64EC: fcvt s0, h0
10+
; ARM64EC: scvtf s1, w0
11+
; ARM64EC: bl "#powf"
12+
define half @powi_f16(half %x, i32 %n) nounwind {
13+
%ret = tail call half @llvm.powi.f16.i32(half %x, i32 %n)
14+
ret half %ret
15+
}
616

717
; ARM64EC-LABEL: powi_f32
818
; ARM64EC: scvtf s1, w0

llvm/test/CodeGen/Generic/half.ll

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-apple-darwin | FileCheck %s --check-prefixes=ALL,CHECK %}
88
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %}
99
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
10-
; FIXME(#94434) unsupported on arm64ec
11-
; RUN: %if aarch64-registered-target %{ ! llc %s -o - -mtriple=arm64ec-pc-windows-msvc -filetype=null %}
10+
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=arm64ec-pc-windows-msvc | FileCheck %s --check-prefixes=EC,CHECK %}
1211
; RUN: %if amdgpu-registered-target %{ llc %s -o - -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=ALL,CHECK %}
1312
; RUN: %if arc-registered-target %{ llc %s -o - -mtriple=arc-elf | FileCheck %s --check-prefixes=ALL,CHECK %}
1413
; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=arm-unknown-linux-gnueabi | FileCheck %s --check-prefixes=ALL,CHECK %}
@@ -47,6 +46,9 @@
4746
; RUN: %if xcore-registered-target %{ llc %s -o - -mtriple=xcore-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK %}
4847
; RUN: %if xtensa-registered-target %{ llc %s -o - -mtriple=xtensa-none-elf | FileCheck %s --check-prefixes=ALL,CHECK %}
4948

49+
; Note that arm64ec labels don't have a `:` so use `EC`, other tests do need the
50+
; `:` so directives with the function names don't get treated as labels.
51+
5052
; Codegen tests don't work the same for graphics targets. Add a dummy directive
5153
; for filecheck, just make sure we don't crash.
5254
; NOCRASH: {{.*}}
@@ -59,6 +61,7 @@
5961

6062
define half @from_bits(i16 %bits) nounwind {
6163
; ALL-LABEL: from_bits:
64+
; EC-LABEL: from_bits
6265
; CHECK-NOT: __extend
6366
; CHECK-NOT: __trunc
6467
; CHECK-NOT: __gnu
@@ -69,6 +72,7 @@ define half @from_bits(i16 %bits) nounwind {
6972

7073
define i16 @to_bits(half %f) nounwind {
7174
; ALL-LABEL: to_bits:
75+
; EC-LABEL: to_bits
7276
; CHECK-NOT: __extend
7377
; CHECK-NOT: __trunc
7478
; CHECK-NOT: __gnu
@@ -82,6 +86,7 @@ define i16 @to_bits(half %f) nounwind {
8286

8387
define half @check_freeze(half %f) nounwind {
8488
; ALL-LABEL: check_freeze:
89+
; EC-LABEL: check_freeze
8590
%t0 = freeze half %f
8691
ret half %t0
8792
}

0 commit comments

Comments
 (0)