Message ID | 1507660725-7986-20-git-send-email-Dave.Martin@arm.com |
---|---|
State | New |
Headers | show |
Series | ARM Scalable Vector Extension (SVE) | expand |
On Tue, Oct 10, 2017 at 07:38:36PM +0100, Dave P Martin wrote: > @@ -702,6 +737,211 @@ static int system_call_set(struct task_struct *target, > return ret; > } > > +#ifdef CONFIG_ARM64_SVE > + > +static void sve_init_header_from_task(struct user_sve_header *header, > + struct task_struct *target) > +{ > + unsigned int vq; > + > + memset(header, 0, sizeof(*header)); > + > + header->flags = test_tsk_thread_flag(target, TIF_SVE) ? > + SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD; For PTRACE_SYSCALL, we may or may not have TIF_SVE depending on what happened with the target. Just a thought: shall we clear TIF_SVE (and sync it to fpsimd) in syscall_trace_enter()? > + if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) > + header->flags |= SVE_PT_VL_INHERIT; > + > + header->vl = target->thread.sve_vl; > + vq = sve_vq_from_vl(header->vl); > + > + header->max_vl = sve_max_vl; > + if (WARN_ON(!sve_vl_valid(sve_max_vl))) > + header->max_vl = header->vl; > + > + header->size = SVE_PT_SIZE(vq, header->flags); > + header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), > + SVE_PT_REGS_SVE); > +} [...] > +static int sve_set(struct task_struct *target, > + const struct user_regset *regset, > + unsigned int pos, unsigned int count, > + const void *kbuf, const void __user *ubuf) > +{ > + int ret; > + struct user_sve_header header; > + unsigned int vq; > + unsigned long start, end; > + > + if (!system_supports_sve()) > + return -EINVAL; > + > + /* Header */ > + if (count < sizeof(header)) > + return -EINVAL; > + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header, > + 0, sizeof(header)); > + if (ret) > + goto out; > + > + /* > + * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by > + * sve_set_vector_length(), which will also validate them for us: > + */ > + ret = sve_set_vector_length(target, header.vl, > + ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); > + if (ret) > + goto out; > + > + /* Actual VL set may be less than the user asked for: */ > + vq = sve_vq_from_vl(target->thread.sve_vl); > + > + /* Registers: FPSIMD-only case */ > + > + BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); > + if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) { > + sve_sync_to_fpsimd(target); > + > + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, > + SVE_PT_FPSIMD_OFFSET); > + clear_tsk_thread_flag(target, TIF_SVE); > + goto out; > + } __fpr_set() already calls sve_sync_to_fpsimd(). Anyway, do you actually need this since we are going to override the FPSIMD state anyway here. > + > + /* Otherwise: full SVE case */ > + > + /* > + * If setting a different VL from the requested VL and there is > + * register data, the data layout will be wrong: don't even > + * try to set the registers in this case. > + */ > + if (count && vq != sve_vq_from_vl(header.vl)) { > + ret = -EIO; > + goto out; > + } > + > + sve_alloc(target); > + fpsimd_sync_to_sve(target); Similarly here, it's a full SVE case, so we are going to override it anyway. > + set_tsk_thread_flag(target, TIF_SVE); This has the side-effect of enabling TIF_SVE even for PTRACE_SYSCALL which may be cleared in some circumstances. It may not be an issue though.
On Thu, Oct 12, 2017 at 06:06:32PM +0100, Catalin Marinas wrote: > On Tue, Oct 10, 2017 at 07:38:36PM +0100, Dave P Martin wrote: > > @@ -702,6 +737,211 @@ static int system_call_set(struct task_struct *target, > > return ret; > > } > > > > +#ifdef CONFIG_ARM64_SVE > > + > > +static void sve_init_header_from_task(struct user_sve_header *header, > > + struct task_struct *target) > > +{ > > + unsigned int vq; > > + > > + memset(header, 0, sizeof(*header)); > > + > > + header->flags = test_tsk_thread_flag(target, TIF_SVE) ? > > + SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD; > > For PTRACE_SYSCALL, we may or may not have TIF_SVE depending on what > happened with the target. Just a thought: shall we clear TIF_SVE (and > sync it to fpsimd) in syscall_trace_enter()? I'm not so sure: if we were to do that, a syscall that is cancelled by writing -1 to REGSET_SYSCALL could still discard the SVE registers as a side-effect. The target committed to discard by executing SVC, but my feeling is that cancellation of a syscall in this way shouldn't have avoidable side-effects for the target. But the semantics of cancelled syscalls are a bit of a grey area, so I can see potential arguments on both sides. The current approach at least saves a bit of code. What do you think? > > + if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) > > + header->flags |= SVE_PT_VL_INHERIT; > > + > > + header->vl = target->thread.sve_vl; > > + vq = sve_vq_from_vl(header->vl); > > + > > + header->max_vl = sve_max_vl; > > + if (WARN_ON(!sve_vl_valid(sve_max_vl))) > > + header->max_vl = header->vl; > > + > > + header->size = SVE_PT_SIZE(vq, header->flags); > > + header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), > > + SVE_PT_REGS_SVE); > > +} > [...] > > +static int sve_set(struct task_struct *target, > > + const struct user_regset *regset, > > + unsigned int pos, unsigned int count, > > + const void *kbuf, const void __user *ubuf) > > +{ > > + int ret; > > + struct user_sve_header header; > > + unsigned int vq; > > + unsigned long start, end; > > + > > + if (!system_supports_sve()) > > + return -EINVAL; > > + > > + /* Header */ > > + if (count < sizeof(header)) > > + return -EINVAL; > > + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header, > > + 0, sizeof(header)); > > + if (ret) > > + goto out; > > + > > + /* > > + * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by > > + * sve_set_vector_length(), which will also validate them for us: > > + */ > > + ret = sve_set_vector_length(target, header.vl, > > + ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); > > + if (ret) > > + goto out; > > + > > + /* Actual VL set may be less than the user asked for: */ > > + vq = sve_vq_from_vl(target->thread.sve_vl); > > + > > + /* Registers: FPSIMD-only case */ > > + > > + BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); > > + if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) { > > + sve_sync_to_fpsimd(target); > > + > > + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, > > + SVE_PT_FPSIMD_OFFSET); > > + clear_tsk_thread_flag(target, TIF_SVE); > > + goto out; > > + } > > __fpr_set() already calls sve_sync_to_fpsimd(). Anyway, do you actually Yes, the call to sve_sync_to_fpsimd() is superfluous here -- I think that I realised that all callers of __fpr_set() need this to happen, but never deleted the explicit call from sve_set(). I'll delete it. Looking more closely at __fpr_set() though, I think it needs this change too, because the sync is unintentionally placed after reading thread.fpsimd_state instead of before: @@ -652,11 +652,12 @@ static int __fpr_set(struct task_struct *target, unsigned int start_pos) { int ret; - struct user_fpsimd_state newstate = - target->thread.fpsimd_state.user_fpsimd; + struct user_fpsimd_state newstate; sve_sync_to_fpsimd(target); + newstate = target->thread.fpsimd_state.user_fpsimd; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, [...] (Or were you confident that this was already OK? Maybe I'm confusing myself.) > need this since we are going to override the FPSIMD state anyway here. The underlying reason for this is the issue of what should happen for short regset writes. Historically, writes through fpr_set() can be truncated arbitrarily, and the rest of fpsimd_state will remain unchanged. The issue is that if TIF_SVE is set, fpsimd_state can be stale for target. If the initial sve_sync_to_fpsimd() is removed in sve_set() above, then we may resurrect old values for the untouched registers, instead of simply leaving them unmodified. Should I add comments explaining the purpose? I guess it is rather non-obvious. Of course, I don't know whether userspace should really rely on partial regset writes doing anything sane, but I figured the implemented behaviour is at least less surprising with respect to the fpr_set() behavior. No legacy software can be relying on NT_ARM_SVE at all, so the behaviour here may not matter that much. My idea was to reduce the invasiveness of porting ptrace clients to use NT_ARM_SVE. > > > + > > + /* Otherwise: full SVE case */ > > + > > + /* > > + * If setting a different VL from the requested VL and there is > > + * register data, the data layout will be wrong: don't even > > + * try to set the registers in this case. > > + */ > > + if (count && vq != sve_vq_from_vl(header.vl)) { > > + ret = -EIO; > > + goto out; > > + } > > + > > + sve_alloc(target); > > + fpsimd_sync_to_sve(target); > > Similarly here, it's a full SVE case, so we are going to override it > anyway. The argument here is similar: target->sve_state might already be allocated and if so it could contain data that doesn't match the user task's idea of what's in the V-regs. (In fact, sve_alloc() currently zeroes sve_state, but that's still different from the current V-regs.) There is no possibility of legacy software relying on this code path, so we could say that a short write to NT_ARM_SVE with PT_SVE_REGS_SVE in flags zeroes any trailing data instead of preserving it. Either way, I don't intend to document the behaviour of partial writes to NT_ARM_SVE. From a userspace point of view, I leave it unspecified. > > + set_tsk_thread_flag(target, TIF_SVE); > > This has the side-effect of enabling TIF_SVE even for PTRACE_SYSCALL > which may be cleared in some circumstances. It may not be an issue > though. I would argue that this is correct behaviour: the syscall enter trap and exit traps should both behave as if they are outside the syscall, allowing the debugger to simulate the effect of inserting any instructions the target could have inserted before or after the SVC. This may include simulating SVE instructions or modifying SVE regs, which would require TIF_SVE to get set. If the tracer doesn't cancel the syscall at the enter trap, then a real syscall will execute and that may cause the SVE state to be discarded in the usual way in the case of preemption or blocking: it seems cleaner for the debug illusion that this decision isn't made differently just because the target is being traced. (Spoiler alert though: the syscall exit trap will force the target to be scheduled out, which will force discard with the current task_fpsimd_save() behaviour ... but that could be changed in the future, and I prefer not to document any sort of guarantee here.) Does this make sense? There may be issues or corner cases here that I didn't spot... Cheers ---Dave
On Fri, Oct 13, 2017 at 05:16:39PM +0100, Dave P Martin wrote: > On Thu, Oct 12, 2017 at 06:06:32PM +0100, Catalin Marinas wrote: > > On Tue, Oct 10, 2017 at 07:38:36PM +0100, Dave P Martin wrote: > > > @@ -702,6 +737,211 @@ static int system_call_set(struct task_struct *target, > > > return ret; > > > } > > > > > > +#ifdef CONFIG_ARM64_SVE > > > + > > > +static void sve_init_header_from_task(struct user_sve_header *header, > > > + struct task_struct *target) > > > +{ > > > + unsigned int vq; > > > + > > > + memset(header, 0, sizeof(*header)); > > > + > > > + header->flags = test_tsk_thread_flag(target, TIF_SVE) ? > > > + SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD; > > > > For PTRACE_SYSCALL, we may or may not have TIF_SVE depending on what > > happened with the target. Just a thought: shall we clear TIF_SVE (and > > sync it to fpsimd) in syscall_trace_enter()? > > I'm not so sure: if we were to do that, a syscall that is cancelled by > writing -1 to REGSET_SYSCALL could still discard the SVE registers as a > side-effect. > > The target committed to discard by executing SVC, but my feeling is > that cancellation of a syscall in this way shouldn't have avoidable > side-effects for the target. But the semantics of cancelled syscalls > are a bit of a grey area, so I can see potential arguments on both > sides. We already can't guarantee this - if the task invoking a syscall gets preempted before syscall_trace_enter(), TIF_SVE will be cleared. Are you reinstating TIF_SVE if the syscall was cancelled? So my comment was more about consistency on presenting the SVE state to the debugger handling PTRACE_SYSCALL. > > > + /* Registers: FPSIMD-only case */ > > > + > > > + BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); > > > + if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) { > > > + sve_sync_to_fpsimd(target); > > > + > > > + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, > > > + SVE_PT_FPSIMD_OFFSET); > > > + clear_tsk_thread_flag(target, TIF_SVE); > > > + goto out; > > > + } > > > > __fpr_set() already calls sve_sync_to_fpsimd(). Anyway, do you actually > > Yes, the call to sve_sync_to_fpsimd() is superfluous here -- I think > that I realised that all callers of __fpr_set() need this to happen, > but never deleted the explicit call from sve_set(). > > I'll delete it. > > > Looking more closely at __fpr_set() though, I think it needs this change > too, because the sync is unintentionally placed after reading > thread.fpsimd_state instead of before: > > @@ -652,11 +652,12 @@ static int __fpr_set(struct task_struct *target, > unsigned int start_pos) > { > int ret; > - struct user_fpsimd_state newstate = > - target->thread.fpsimd_state.user_fpsimd; > + struct user_fpsimd_state newstate; > > sve_sync_to_fpsimd(target); > > + newstate = target->thread.fpsimd_state.user_fpsimd; > + > ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, > [...] > > (Or were you confident that this was already OK? Maybe I'm confusing > myself.) With the sve_sync_to_fpsimd() called before __fpr_set(), it was ok. Once you removed that you indeed need the change to __fpr_set(). > > need this since we are going to override the FPSIMD state anyway here. > > The underlying reason for this is the issue of what should happen > for short regset writes. Historically, writes through fpr_set() can > be truncated arbitrarily, and the rest of fpsimd_state will remain > unchanged. Ah, I forgot about this. > The issue is that if TIF_SVE is set, fpsimd_state can be stale for > target. If the initial sve_sync_to_fpsimd() is removed in sve_set() > above, then we may resurrect old values for the untouched registers, > instead of simply leaving them unmodified. > > Should I add comments explaining the purpose? I guess it is rather > non-obvious. Yes, please. > > > + set_tsk_thread_flag(target, TIF_SVE); > > > > This has the side-effect of enabling TIF_SVE even for PTRACE_SYSCALL > > which may be cleared in some circumstances. It may not be an issue > > though. > > I would argue that this is correct behaviour: the syscall enter trap and > exit traps should both behave as if they are outside the syscall, > allowing the debugger to simulate the effect of inserting any > instructions the target could have inserted before or after the SVC. > This may include simulating SVE instructions or modifying SVE regs, > which would require TIF_SVE to get set. I'm ok with this approach but I'm not sure we can guarantee it with preemption enabled. > If the tracer doesn't cancel the syscall at the enter trap, then a > real syscall will execute and that may cause the SVE state to be > discarded in the usual way in the case of preemption or blocking: it > seems cleaner for the debug illusion that this decision isn't made > differently just because the target is being traced. > > (Spoiler alert though: the syscall exit trap will force the target > to be scheduled out, which will force discard with the current > task_fpsimd_save() behaviour ... but that could be changed in the > future, and I prefer not to document any sort of guarantee here.) If such state isn't guaranteed, then the de-facto ABI is that the debugger cannot simulate any SVE instruction via NO_SYSCALL since the SVE state may be discarded. So it can only rely on what's guaranteed and changing the behaviour later won't actually help.
On Wed, Oct 18, 2017 at 11:32:55AM +0100, Catalin Marinas wrote: > On Fri, Oct 13, 2017 at 05:16:39PM +0100, Dave P Martin wrote: > > On Thu, Oct 12, 2017 at 06:06:32PM +0100, Catalin Marinas wrote: > > > On Tue, Oct 10, 2017 at 07:38:36PM +0100, Dave P Martin wrote: > > > > @@ -702,6 +737,211 @@ static int system_call_set(struct task_struct *target, > > > > return ret; > > > > } > > > > > > > > +#ifdef CONFIG_ARM64_SVE > > > > + > > > > +static void sve_init_header_from_task(struct user_sve_header *header, > > > > + struct task_struct *target) > > > > +{ > > > > + unsigned int vq; > > > > + > > > > + memset(header, 0, sizeof(*header)); > > > > + > > > > + header->flags = test_tsk_thread_flag(target, TIF_SVE) ? > > > > + SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD; > > > > > > For PTRACE_SYSCALL, we may or may not have TIF_SVE depending on what > > > happened with the target. Just a thought: shall we clear TIF_SVE (and > > > sync it to fpsimd) in syscall_trace_enter()? > > > > I'm not so sure: if we were to do that, a syscall that is cancelled by > > writing -1 to REGSET_SYSCALL could still discard the SVE registers as a > > side-effect. > > > > The target committed to discard by executing SVC, but my feeling is > > that cancellation of a syscall in this way shouldn't have avoidable > > side-effects for the target. But the semantics of cancelled syscalls > > are a bit of a grey area, so I can see potential arguments on both > > sides. > > We already can't guarantee this - if the task invoking a syscall gets > preempted before syscall_trace_enter(), TIF_SVE will be cleared. Are you > reinstating TIF_SVE if the syscall was cancelled? > > So my comment was more about consistency on presenting the SVE state to > the debugger handling PTRACE_SYSCALL. See the end of this reply. > > > > + /* Registers: FPSIMD-only case */ > > > > + > > > > + BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); > > > > + if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) { > > > > + sve_sync_to_fpsimd(target); > > > > + > > > > + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, > > > > + SVE_PT_FPSIMD_OFFSET); > > > > + clear_tsk_thread_flag(target, TIF_SVE); > > > > + goto out; > > > > + } > > > > > > __fpr_set() already calls sve_sync_to_fpsimd(). Anyway, do you actually > > > > Yes, the call to sve_sync_to_fpsimd() is superfluous here -- I think > > that I realised that all callers of __fpr_set() need this to happen, > > but never deleted the explicit call from sve_set(). > > > > I'll delete it. > > > > > > Looking more closely at __fpr_set() though, I think it needs this change > > too, because the sync is unintentionally placed after reading > > thread.fpsimd_state instead of before: > > > > @@ -652,11 +652,12 @@ static int __fpr_set(struct task_struct *target, > > unsigned int start_pos) > > { > > int ret; > > - struct user_fpsimd_state newstate = > > - target->thread.fpsimd_state.user_fpsimd; > > + struct user_fpsimd_state newstate; > > > > sve_sync_to_fpsimd(target); > > > > + newstate = target->thread.fpsimd_state.user_fpsimd; > > + > > ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, > > [...] > > > > (Or were you confident that this was already OK? Maybe I'm confusing > > myself.) > > With the sve_sync_to_fpsimd() called before __fpr_set(), it was ok. Once > you removed that you indeed need the change to __fpr_set(). Hmmm, yes. Anyway, I've applied the above fix, so I think this should be fine now. > > > need this since we are going to override the FPSIMD state anyway here. > > > > The underlying reason for this is the issue of what should happen > > for short regset writes. Historically, writes through fpr_set() can > > be truncated arbitrarily, and the rest of fpsimd_state will remain > > unchanged. > > Ah, I forgot about this. > > > The issue is that if TIF_SVE is set, fpsimd_state can be stale for > > target. If the initial sve_sync_to_fpsimd() is removed in sve_set() > > above, then we may resurrect old values for the untouched registers, > > instead of simply leaving them unmodified. > > > > Should I add comments explaining the purpose? I guess it is rather > > non-obvious. > > Yes, please. Will do. It's pretty yucky. > > > > > + set_tsk_thread_flag(target, TIF_SVE); > > > > > > This has the side-effect of enabling TIF_SVE even for PTRACE_SYSCALL > > > which may be cleared in some circumstances. It may not be an issue > > > though. > > > > I would argue that this is correct behaviour: the syscall enter trap and > > exit traps should both behave as if they are outside the syscall, > > allowing the debugger to simulate the effect of inserting any > > instructions the target could have inserted before or after the SVC. > > This may include simulating SVE instructions or modifying SVE regs, > > which would require TIF_SVE to get set. > > I'm ok with this approach but I'm not sure we can guarantee it with > preemption enabled. Hmmm, I think I'm guilty of inventing a spurious argument here. Apparently gdb does not do syscall cancelling. Strace does though. (See http://man7.org/linux/man-pages/man1/strace.1.html, search for "syscall tampering" ;) However, it never tries to skip a syscall entirelity AFAICT: instead, it only attempts to fake what occurs during the syscall, such as bailing out with a predetermined return value. > > If the tracer doesn't cancel the syscall at the enter trap, then a > > real syscall will execute and that may cause the SVE state to be > > discarded in the usual way in the case of preemption or blocking: it > > seems cleaner for the debug illusion that this decision isn't made > > differently just because the target is being traced. > > > > (Spoiler alert though: the syscall exit trap will force the target > > to be scheduled out, which will force discard with the current > > task_fpsimd_save() behaviour ... but that could be changed in the > > future, and I prefer not to document any sort of guarantee here.) > > If such state isn't guaranteed, then the de-facto ABI is that the > debugger cannot simulate any SVE instruction via NO_SYSCALL since the > SVE state may be discarded. So it can only rely on what's guaranteed and > changing the behaviour later won't actually help. I think you're right. Simulating the content of the syscall does work though, and doesn't depend on whether SVE discard occurs or not. If the tracer sets the SVE regs at the syscall enter/exit traps, they could still be discarded again before the tracee reenters userspace, but that's no different from what happens in a real syscall. So I think my overall argument would be: ptrace should be as orthogonal as possible to SVE discard. Currently ptrace neither assumes that SVE discard will happen or that it won't: it just does what the tracer asks for and tries to be safe with either and doesn't try to hide the effects from the tracer or tracee. I worry that introducing more interdependencies between ptrace and the fpsimd.c code will complicate maintenance rather than making it easier. Does that may my position clearer? If we go with this, I should add a note to the documentation explaining how NT_ARM_SVE writes interact with ptrace syscall traps. The alternative would be to forcibly discard SVE in syscall_trace_enter(), but this doesn't seem to simplify the NT_ARM_SVE implementation at all -- that code needs to work for all types of ptrace-stop, not just syscall traps: other that syscall traps, SVE is potentially live for the tracee and must not be discarded. So I'm still not clear on what the gain is. Cheers ---Dave
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index bad72fd..ee6db38 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -38,13 +38,16 @@ struct fpsimd_state { __uint128_t vregs[32]; u32 fpsr; u32 fpcr; + /* + * For ptrace compatibility, pad to next 128-bit + * boundary here if extending this struct. + */ }; }; /* the id of the last cpu to have restored this state */ unsigned int cpu; }; - #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* Masks for extracting the FPSR and FPCR from the FPSCR */ #define VFP_FPSCR_STAT_MASK 0xf800009f @@ -89,6 +92,10 @@ extern size_t sve_state_size(struct task_struct const *task); extern void sve_alloc(struct task_struct *task); extern void fpsimd_release_thread(struct task_struct *task); +extern void fpsimd_sync_to_sve(struct task_struct *task); +extern void sve_sync_to_fpsimd(struct task_struct *task); +extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); + extern int sve_set_vector_length(struct task_struct *task, unsigned long vl, unsigned long flags); @@ -105,6 +112,10 @@ extern void __init sve_setup(void); static void __maybe_unused sve_alloc(struct task_struct *task) { } static void __maybe_unused fpsimd_release_thread(struct task_struct *task) { } +static void __maybe_unused sve_sync_to_fpsimd(struct task_struct *task) { } +static void __maybe_unused sve_sync_from_fpsimd_zeropad( + struct task_struct *task) { } + static void __maybe_unused sve_init_vq_map(void) { } static void __maybe_unused sve_update_vq_map(void) { } static int __maybe_unused sve_verify_vq_map(void) { return 0; } diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index d1ff83d..a0fc6a8 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -22,6 +22,7 @@ #include <linux/types.h> #include <asm/hwcap.h> +#include <asm/sigcontext.h> /* @@ -63,6 +64,8 @@ #ifndef __ASSEMBLY__ +#include <linux/prctl.h> + /* * User structures for general purpose, floating point and debug registers. */ @@ -90,6 +93,141 @@ struct user_hwdebug_state { } dbg_regs[16]; }; +/* SVE/FP/SIMD state (NT_ARM_SVE) */ + +struct user_sve_header { + __u32 size; /* total meaningful regset content in bytes */ + __u32 max_size; /* maxmium possible size for this thread */ + __u16 vl; /* current vector length */ + __u16 max_vl; /* maximum possible vector length */ + __u16 flags; + __u16 __reserved; +}; + +/* Definitions for user_sve_header.flags: */ +#define SVE_PT_REGS_MASK (1 << 0) + +#define SVE_PT_REGS_FPSIMD 0 +#define SVE_PT_REGS_SVE SVE_PT_REGS_MASK + +/* + * Common SVE_PT_* flags: + * These must be kept in sync with prctl interface in <linux/ptrace.h> + */ +#define SVE_PT_VL_INHERIT (PR_SVE_VL_INHERIT >> 16) +#define SVE_PT_VL_ONEXEC (PR_SVE_SET_VL_ONEXEC >> 16) + + +/* + * The remainder of the SVE state follows struct user_sve_header. The + * total size of the SVE state (including header) depends on the + * metadata in the header: SVE_PT_SIZE(vq, flags) gives the total size + * of the state in bytes, including the header. + * + * Refer to <asm/sigcontext.h> for details of how to pass the correct + * "vq" argument to these macros. + */ + +/* Offset from the start of struct user_sve_header to the register data */ +#define SVE_PT_REGS_OFFSET \ + ((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \ + / SVE_VQ_BYTES * SVE_VQ_BYTES) + +/* + * The register data content and layout depends on the value of the + * flags field. + */ + +/* + * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD case: + * + * The payload starts at offset SVE_PT_FPSIMD_OFFSET, and is of type + * struct user_fpsimd_state. Additional data might be appended in the + * future: use SVE_PT_FPSIMD_SIZE(vq, flags) to compute the total size. + * SVE_PT_FPSIMD_SIZE(vq, flags) will never be less than + * sizeof(struct user_fpsimd_state). + */ + +#define SVE_PT_FPSIMD_OFFSET SVE_PT_REGS_OFFSET + +#define SVE_PT_FPSIMD_SIZE(vq, flags) (sizeof(struct user_fpsimd_state)) + +/* + * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE case: + * + * The payload starts at offset SVE_PT_SVE_OFFSET, and is of size + * SVE_PT_SVE_SIZE(vq, flags). + * + * Additional macros describe the contents and layout of the payload. + * For each, SVE_PT_SVE_x_OFFSET(args) is the start offset relative to + * the start of struct user_sve_header, and SVE_PT_SVE_x_SIZE(args) is + * the size in bytes: + * + * x type description + * - ---- ----------- + * ZREGS \ + * ZREG | + * PREGS | refer to <asm/sigcontext.h> + * PREG | + * FFR / + * + * FPSR uint32_t FPSR + * FPCR uint32_t FPCR + * + * Additional data might be appended in the future. + */ + +#define SVE_PT_SVE_ZREG_SIZE(vq) SVE_SIG_ZREG_SIZE(vq) +#define SVE_PT_SVE_PREG_SIZE(vq) SVE_SIG_PREG_SIZE(vq) +#define SVE_PT_SVE_FFR_SIZE(vq) SVE_SIG_FFR_SIZE(vq) +#define SVE_PT_SVE_FPSR_SIZE sizeof(__u32) +#define SVE_PT_SVE_FPCR_SIZE sizeof(__u32) + +#define __SVE_SIG_TO_PT(offset) \ + ((offset) - SVE_SIG_REGS_OFFSET + SVE_PT_REGS_OFFSET) + +#define SVE_PT_SVE_OFFSET SVE_PT_REGS_OFFSET + +#define SVE_PT_SVE_ZREGS_OFFSET \ + __SVE_SIG_TO_PT(SVE_SIG_ZREGS_OFFSET) +#define SVE_PT_SVE_ZREG_OFFSET(vq, n) \ + __SVE_SIG_TO_PT(SVE_SIG_ZREG_OFFSET(vq, n)) +#define SVE_PT_SVE_ZREGS_SIZE(vq) \ + (SVE_PT_SVE_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET) + +#define SVE_PT_SVE_PREGS_OFFSET(vq) \ + __SVE_SIG_TO_PT(SVE_SIG_PREGS_OFFSET(vq)) +#define SVE_PT_SVE_PREG_OFFSET(vq, n) \ + __SVE_SIG_TO_PT(SVE_SIG_PREG_OFFSET(vq, n)) +#define SVE_PT_SVE_PREGS_SIZE(vq) \ + (SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \ + SVE_PT_SVE_PREGS_OFFSET(vq)) + +#define SVE_PT_SVE_FFR_OFFSET(vq) \ + __SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq)) + +#define SVE_PT_SVE_FPSR_OFFSET(vq) \ + ((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) + \ + (SVE_VQ_BYTES - 1)) \ + / SVE_VQ_BYTES * SVE_VQ_BYTES) +#define SVE_PT_SVE_FPCR_OFFSET(vq) \ + (SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE) + +/* + * Any future extension appended after FPCR must be aligned to the next + * 128-bit boundary. + */ + +#define SVE_PT_SVE_SIZE(vq, flags) \ + ((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE \ + - SVE_PT_SVE_OFFSET + (SVE_VQ_BYTES - 1)) \ + / SVE_VQ_BYTES * SVE_VQ_BYTES) + +#define SVE_PT_SIZE(vq, flags) \ + (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ + SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ + : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags)) + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index c194627..6db9f30 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -438,6 +438,66 @@ void sve_alloc(struct task_struct *task) BUG_ON(!task->thread.sve_state); } + +/* + * Ensure that task->thread.sve_state is up to date with respect to + * the user task, irrespective of when SVE is in use or not. + * + * This should only be called by ptrace. task must be non-runnable. + * task->thread.sve_state must point to at least sve_state_size(task) + * bytes of allocated kernel memory. + */ +void fpsimd_sync_to_sve(struct task_struct *task) +{ + if (!test_tsk_thread_flag(task, TIF_SVE)) + fpsimd_to_sve(task); +} + +/* + * Ensure that task->thread.fpsimd_state is up to date with respect to + * the user task, irrespective of whether SVE is in use or not. + * + * This should only be called by ptrace. task must be non-runnable. + * task->thread.sve_state must point to at least sve_state_size(task) + * bytes of allocated kernel memory. + */ +void sve_sync_to_fpsimd(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_SVE)) + sve_to_fpsimd(task); +} + +/* + * Ensure that task->thread.sve_state is up to date with respect to + * the task->thread.fpsimd_state. + * + * This should only be called by ptrace to merge new FPSIMD register + * values into a task for which SVE is currently active. + * task must be non-runnable. + * task->thread.sve_state must point to at least sve_state_size(task) + * bytes of allocated kernel memory. + * task->thread.fpsimd_state must already have been initialised with + * the new FPSIMD register values to be merged in. + */ +void sve_sync_from_fpsimd_zeropad(struct task_struct *task) +{ + unsigned int vq; + void *sst = task->thread.sve_state; + struct fpsimd_state const *fst = &task->thread.fpsimd_state; + unsigned int i; + + if (!test_tsk_thread_flag(task, TIF_SVE)) + return; + + vq = sve_vq_from_vl(task->thread.sve_vl); + + memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); + + for (i = 0; i < 32; ++i) + memcpy(ZREG(sst, vq, i), &fst->vregs[i], + sizeof(fst->vregs[i])); +} + int sve_set_vector_length(struct task_struct *task, unsigned long vl, unsigned long flags) { diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 9cbb612..7252209 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -32,6 +32,7 @@ #include <linux/security.h> #include <linux/init.h> #include <linux/signal.h> +#include <linux/string.h> #include <linux/uaccess.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> @@ -40,6 +41,7 @@ #include <linux/elf.h> #include <asm/compat.h> +#include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/pgtable.h> #include <asm/stacktrace.h> @@ -618,33 +620,66 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, /* * TODO: update fp accessors for lazy context switching (sync/flush hwstate) */ -static int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) +static int __fpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf, unsigned int start_pos) { struct user_fpsimd_state *uregs; + + sve_sync_to_fpsimd(target); + uregs = &target->thread.fpsimd_state.user_fpsimd; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, + start_pos, start_pos + sizeof(*uregs)); +} + +static int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ if (target == current) fpsimd_preserve_current_state(); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); + return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0); } -static int fpr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) +static int __fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned int start_pos) { int ret; struct user_fpsimd_state newstate = target->thread.fpsimd_state.user_fpsimd; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); + sve_sync_to_fpsimd(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, + start_pos, start_pos + sizeof(newstate)); if (ret) return ret; target->thread.fpsimd_state.user_fpsimd = newstate; + + return ret; +} + +static int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0); + if (ret) + return ret; + + sve_sync_from_fpsimd_zeropad(target); fpsimd_flush_task_state(target); + return ret; } @@ -702,6 +737,211 @@ static int system_call_set(struct task_struct *target, return ret; } +#ifdef CONFIG_ARM64_SVE + +static void sve_init_header_from_task(struct user_sve_header *header, + struct task_struct *target) +{ + unsigned int vq; + + memset(header, 0, sizeof(*header)); + + header->flags = test_tsk_thread_flag(target, TIF_SVE) ? + SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD; + if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; + + header->vl = target->thread.sve_vl; + vq = sve_vq_from_vl(header->vl); + + header->max_vl = sve_max_vl; + if (WARN_ON(!sve_vl_valid(sve_max_vl))) + header->max_vl = header->vl; + + header->size = SVE_PT_SIZE(vq, header->flags); + header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), + SVE_PT_REGS_SVE); +} + +static unsigned int sve_size_from_header(struct user_sve_header const *header) +{ + return ALIGN(header->size, SVE_VQ_BYTES); +} + +static unsigned int sve_get_size(struct task_struct *target, + const struct user_regset *regset) +{ + struct user_sve_header header; + + if (!system_supports_sve()) + return 0; + + sve_init_header_from_task(&header, target); + return sve_size_from_header(&header); +} + +static int sve_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + struct user_sve_header header; + unsigned int vq; + unsigned long start, end; + + if (!system_supports_sve()) + return -EINVAL; + + /* Header */ + sve_init_header_from_task(&header, target); + vq = sve_vq_from_vl(header.vl); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header, + 0, sizeof(header)); + if (ret) + return ret; + + if (target == current) + fpsimd_preserve_current_state(); + + /* Registers: FPSIMD-only case */ + + BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); + if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) + return __fpr_get(target, regset, pos, count, kbuf, ubuf, + SVE_PT_FPSIMD_OFFSET); + + /* Otherwise: full SVE case */ + + BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); + start = SVE_PT_SVE_OFFSET; + end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + target->thread.sve_state, + start, end); + if (ret) + return ret; + + start = end; + end = SVE_PT_SVE_FPSR_OFFSET(vq); + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + start, end); + if (ret) + return ret; + + /* + * Copy fpsr, and fpcr which must follow contiguously in + * struct fpsimd_state: + */ + start = end; + end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fpsimd_state.fpsr, + start, end); + if (ret) + return ret; + + start = end; + end = sve_size_from_header(&header); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + start, end); +} + +static int sve_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct user_sve_header header; + unsigned int vq; + unsigned long start, end; + + if (!system_supports_sve()) + return -EINVAL; + + /* Header */ + if (count < sizeof(header)) + return -EINVAL; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header, + 0, sizeof(header)); + if (ret) + goto out; + + /* + * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by + * sve_set_vector_length(), which will also validate them for us: + */ + ret = sve_set_vector_length(target, header.vl, + ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); + if (ret) + goto out; + + /* Actual VL set may be less than the user asked for: */ + vq = sve_vq_from_vl(target->thread.sve_vl); + + /* Registers: FPSIMD-only case */ + + BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); + if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) { + sve_sync_to_fpsimd(target); + + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, + SVE_PT_FPSIMD_OFFSET); + clear_tsk_thread_flag(target, TIF_SVE); + goto out; + } + + /* Otherwise: full SVE case */ + + /* + * If setting a different VL from the requested VL and there is + * register data, the data layout will be wrong: don't even + * try to set the registers in this case. + */ + if (count && vq != sve_vq_from_vl(header.vl)) { + ret = -EIO; + goto out; + } + + sve_alloc(target); + fpsimd_sync_to_sve(target); + set_tsk_thread_flag(target, TIF_SVE); + + BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); + start = SVE_PT_SVE_OFFSET; + end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.sve_state, + start, end); + if (ret) + goto out; + + start = end; + end = SVE_PT_SVE_FPSR_OFFSET(vq); + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + start, end); + if (ret) + goto out; + + /* + * Copy fpsr, and fpcr which must follow contiguously in + * struct fpsimd_state: + */ + start = end; + end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpsimd_state.fpsr, + start, end); + +out: + fpsimd_flush_task_state(target); + return ret; +} + +#endif /* CONFIG_ARM64_SVE */ + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -711,6 +951,9 @@ enum aarch64_regset { REGSET_HW_WATCH, #endif REGSET_SYSTEM_CALL, +#ifdef CONFIG_ARM64_SVE + REGSET_SVE, +#endif }; static const struct user_regset aarch64_regsets[] = { @@ -768,6 +1011,18 @@ static const struct user_regset aarch64_regsets[] = { .get = system_call_get, .set = system_call_set, }, +#ifdef CONFIG_ARM64_SVE + [REGSET_SVE] = { /* Scalable Vector Extension */ + .core_note_type = NT_ARM_SVE, + .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + SVE_VQ_BYTES), + .size = SVE_VQ_BYTES, + .align = SVE_VQ_BYTES, + .get = sve_get, + .set = sve_set, + .get_size = sve_get_size, + }, +#endif }; static const struct user_regset_view user_aarch64_view = { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b5280db..735b8f4 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -416,6 +416,7 @@ typedef struct elf64_shdr { #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ #define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */ #define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */ #define NT_METAG_TLS 0x502 /* Metag TLS pointer */