Skip to content

Commit 1315014

Browse files
ardbiesheuvelctmarinas
authored andcommitted
arm64: fpsimd: run kernel mode NEON with softirqs disabled
Kernel mode NEON can be used in task or softirq context, but only in a non-nesting manner, i.e., softirq context is only permitted if the interrupt was not taken at a point where the kernel was using the NEON in task context. This means all users of kernel mode NEON have to be aware of this limitation, and either need to provide scalar fallbacks that may be much slower (up to 20x for AES instructions) and potentially less safe, or use an asynchronous interface that defers processing to a later time when the NEON is guaranteed to be available. Given that grabbing and releasing the NEON is cheap, we can relax this restriction, by increasing the granularity of kernel mode NEON code, and always disabling softirq processing while the NEON is being used in task context. Signed-off-by: Ard Biesheuvel <[email protected]> Acked-by: Will Deacon <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Catalin Marinas <[email protected]>
1 parent 4c4dcd3 commit 1315014

File tree

8 files changed

+31
-15
lines changed

8 files changed

+31
-15
lines changed

arch/arm64/crypto/aes-modes.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -700,7 +700,7 @@ AES_FUNC_START(aes_mac_update)
700700
cbz w5, .Lmacout
701701
encrypt_block v0, w2, x1, x7, w8
702702
st1 {v0.16b}, [x4] /* return dg */
703-
cond_yield .Lmacout, x7
703+
cond_yield .Lmacout, x7, x8
704704
b .Lmacloop4x
705705
.Lmac1x:
706706
add w3, w3, #4

arch/arm64/crypto/sha1-ce-core.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ CPU_LE( rev32 v11.16b, v11.16b )
121121
add dgav.4s, dgav.4s, dg0v.4s
122122

123123
cbz w2, 2f
124-
cond_yield 3f, x5
124+
cond_yield 3f, x5, x6
125125
b 0b
126126

127127
/*

arch/arm64/crypto/sha2-ce-core.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ CPU_LE( rev32 v19.16b, v19.16b )
129129

130130
/* handled all input blocks? */
131131
cbz w2, 2f
132-
cond_yield 3f, x5
132+
cond_yield 3f, x5, x6
133133
b 0b
134134

135135
/*

arch/arm64/crypto/sha3-ce-core.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,11 +184,11 @@ SYM_FUNC_START(sha3_ce_transform)
184184
eor v0.16b, v0.16b, v31.16b
185185

186186
cbnz w8, 3b
187-
cond_yield 3f, x8
187+
cond_yield 4f, x8, x9
188188
cbnz w2, 0b
189189

190190
/* save state */
191-
3: st1 { v0.1d- v3.1d}, [x0], #32
191+
4: st1 { v0.1d- v3.1d}, [x0], #32
192192
st1 { v4.1d- v7.1d}, [x0], #32
193193
st1 { v8.1d-v11.1d}, [x0], #32
194194
st1 {v12.1d-v15.1d}, [x0], #32

arch/arm64/crypto/sha512-ce-core.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ CPU_LE( rev64 v19.16b, v19.16b )
195195
add v10.2d, v10.2d, v2.2d
196196
add v11.2d, v11.2d, v3.2d
197197

198-
cond_yield 3f, x4
198+
cond_yield 3f, x4, x5
199199
/* handled all input blocks? */
200200
cbnz w2, 0b
201201

arch/arm64/include/asm/assembler.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <asm-generic/export.h>
1616

1717
#include <asm/asm-offsets.h>
18+
#include <asm/alternative.h>
1819
#include <asm/cpufeature.h>
1920
#include <asm/cputype.h>
2021
#include <asm/debug-monitors.h>
@@ -701,19 +702,32 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
701702
.endm
702703

703704
/*
704-
* Check whether preempt-disabled code should yield as soon as it
705-
* is able. This is the case if re-enabling preemption a single
706-
* time results in a preempt count of zero, and the TIF_NEED_RESCHED
707-
* flag is set. (Note that the latter is stored negated in the
708-
* top word of the thread_info::preempt_count field)
705+
* Check whether preempt/bh-disabled asm code should yield as soon as
706+
* it is able. This is the case if we are currently running in task
707+
* context, and either a softirq is pending, or the TIF_NEED_RESCHED
708+
* flag is set and re-enabling preemption a single time would result in
709+
* a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is
710+
* stored negated in the top word of the thread_info::preempt_count
711+
* field)
709712
*/
710-
.macro cond_yield, lbl:req, tmp:req
711-
#ifdef CONFIG_PREEMPTION
713+
.macro cond_yield, lbl:req, tmp:req, tmp2:req
712714
get_current_task \tmp
713715
ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
716+
/*
717+
* If we are serving a softirq, there is no point in yielding: the
718+
* softirq will not be preempted no matter what we do, so we should
719+
* run to completion as quickly as we can.
720+
*/
721+
tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@
722+
#ifdef CONFIG_PREEMPTION
714723
sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET
715724
cbz \tmp, \lbl
716725
#endif
726+
adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
727+
this_cpu_offset \tmp2
728+
ldr w\tmp, [\tmp, \tmp2]
729+
cbnz w\tmp, \lbl // yield on pending softirq in task context
730+
.Lnoyield_\@:
717731
.endm
718732

719733
/*

arch/arm64/kernel/asm-offsets.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ int main(void)
9595
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
9696
BLANK();
9797
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
98+
DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
99+
DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
98100
BLANK();
99101
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
100102
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));

arch/arm64/kernel/fpsimd.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ static void __get_cpu_fpsimd_context(void)
180180
*/
181181
static void get_cpu_fpsimd_context(void)
182182
{
183-
preempt_disable();
183+
local_bh_disable();
184184
__get_cpu_fpsimd_context();
185185
}
186186

@@ -201,7 +201,7 @@ static void __put_cpu_fpsimd_context(void)
201201
static void put_cpu_fpsimd_context(void)
202202
{
203203
__put_cpu_fpsimd_context();
204-
preempt_enable();
204+
local_bh_enable();
205205
}
206206

207207
static bool have_cpu_fpsimd_context(void)

0 commit comments

Comments
 (0)