sound/core/pcm_compat.c | 40 +++++++++++++++++++++++++++------------- sound/core/pcm_native.c | 40 +++++++++++++++++++++++++++------------- 2 files changed, 54 insertions(+), 26 deletions(-)
With user access protection (Called SMAP on x86 or KUAP on powerpc)
each and every call to get_user() or put_user() performs heavy
operations to unlock and lock kernel access to userspace.
SNDRV_PCM_IOCTL_SYNC_PTR ioctl is a hot path that needs to be
optimised. To do that, perform user accesses by blocks using
user_access_begin/user_access_end() and unsafe_get_user()/
unsafe_put_user() and alike.
Before the patch the 9 calls to put_user() at the
end of snd_pcm_ioctl_sync_ptr_compat() imply the following set of
instructions about 9 times (access_ok - enable user - write - disable
user):
0.00 : c057f858: 3d 20 7f ff lis r9,32767
0.29 : c057f85c: 39 5e 00 14 addi r10,r30,20
0.77 : c057f860: 61 29 ff fc ori r9,r9,65532
0.32 : c057f864: 7c 0a 48 40 cmplw r10,r9
0.36 : c057f868: 41 a1 fb 58 bgt c057f3c0 <snd_pcm_ioctl+0xbb0>
0.30 : c057f86c: 3d 20 dc 00 lis r9,-9216
1.95 : c057f870: 7d 3a c3 a6 mtspr 794,r9
0.33 : c057f874: 92 8a 00 00 stw r20,0(r10)
0.27 : c057f878: 3d 20 de 00 lis r9,-8704
0.28 : c057f87c: 7d 3a c3 a6 mtspr 794,r9
...
A perf profile shows that in total the 9 put_user() represent 36% of
the time spent in snd_pcm_ioctl() and about 80 instructions.
With this patch everything is done in 13 instructions and represent
only 15% of the time spent in snd_pcm_ioctl():
0.57 : c057f5dc: 3d 20 dc 00 lis r9,-9216
0.98 : c057f5e0: 7d 3a c3 a6 mtspr 794,r9
0.16 : c057f5e4: 92 7f 00 04 stw r19,4(r31)
0.63 : c057f5e8: 93 df 00 0c stw r30,12(r31)
0.16 : c057f5ec: 93 9f 00 10 stw r28,16(r31)
4.95 : c057f5f0: 92 9f 00 14 stw r20,20(r31)
0.19 : c057f5f4: 92 5f 00 18 stw r18,24(r31)
0.49 : c057f5f8: 92 bf 00 1c stw r21,28(r31)
0.27 : c057f5fc: 93 7f 00 20 stw r27,32(r31)
5.88 : c057f600: 93 36 00 00 stw r25,0(r22)
0.11 : c057f604: 93 17 00 00 stw r24,0(r23)
0.00 : c057f608: 3d 20 de 00 lis r9,-8704
0.79 : c057f60c: 7d 3a c3 a6 mtspr 794,r9
Note that here the access_ok() in user_write_access_begin() is skipped
because the exact same verification has already been performed at the
beginning of the fonction with the call to user_read_access_begin().
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
This is a lighter version of previous patch "[PATCH v2] ALSA: pcm: Convert multiple {get/put}_user to user_access_begin/user_access_end()" focussing on identified hot path.
Moved and nested the failure labels closer in order to increase readability
Link: https://patchwork.ozlabs.org/project/linuxppc-dev/patch/d2609397eafc2b55ec1f44a3f30ccec00e0c7f6e.1749455639.git.christophe.leroy@csgroup.eu/
sound/core/pcm_compat.c | 40 +++++++++++++++++++++++++++-------------
sound/core/pcm_native.c | 40 +++++++++++++++++++++++++++-------------
2 files changed, 54 insertions(+), 26 deletions(-)
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index a42ec7f5a1da..348a72e6499e 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -418,10 +418,18 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
if (snd_BUG_ON(!runtime))
return -EINVAL;
- if (get_user(sflags, &src->flags) ||
- get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
- get_user(scontrol.avail_min, &src->c.control.avail_min))
+ if (!user_read_access_begin(src, sizeof(*src)))
return -EFAULT;
+ err = -EFAULT;
+ unsafe_get_user(sflags, &src->flags, Efault_rd);
+ unsafe_get_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_rd);
+ unsafe_get_user(scontrol.avail_min, &src->c.control.avail_min, Efault_rd);
+ err = 0;
+Efault_rd:
+ user_read_access_end();
+ if (err)
+ return err;
+
if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
err = snd_pcm_hwsync(substream);
if (err < 0)
@@ -450,18 +458,24 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
}
if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
- if (put_user(sstatus.state, &src->s.status.state) ||
- put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) ||
- put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) ||
- put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec) ||
- put_user(sstatus.suspended_state, &src->s.status.suspended_state) ||
- put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec) ||
- put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec) ||
- put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
- put_user(scontrol.avail_min, &src->c.control.avail_min))
+
+ if (!user_write_access_begin(src, sizeof(*src)))
return -EFAULT;
+ err = -EFAULT;
+ unsafe_put_user(sstatus.state, &src->s.status.state, Efault_wr);
+ unsafe_put_user(sstatus.hw_ptr, &src->s.status.hw_ptr, Efault_wr);
+ unsafe_put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec, Efault_wr);
+ unsafe_put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec, Efault_wr);
+ unsafe_put_user(sstatus.suspended_state, &src->s.status.suspended_state, Efault_wr);
+ unsafe_put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec, Efault_wr);
+ unsafe_put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec, Efault_wr);
+ unsafe_put_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_wr);
+ unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr);
+ err = 0;
+Efault_wr:
+ user_write_access_end();
- return 0;
+ return err;
}
#endif /* CONFIG_X86_X32_ABI */
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index ecb71bf1859d..b104faddb6e3 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3165,10 +3165,18 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
if (snd_BUG_ON(!runtime))
return -EINVAL;
- if (get_user(sflags, &src->flags) ||
- get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
- get_user(scontrol.avail_min, &src->c.control.avail_min))
+ if (!user_read_access_begin(src, sizeof(*src)))
return -EFAULT;
+ err = -EFAULT;
+ unsafe_get_user(sflags, &src->flags, Efault_rd);
+ unsafe_get_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_rd);
+ unsafe_get_user(scontrol.avail_min, &src->c.control.avail_min, Efault_rd);
+ err = 0;
+Efault_rd:
+ user_read_access_end();
+ if (err)
+ return err;
+
if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
err = snd_pcm_hwsync(substream);
if (err < 0)
@@ -3200,18 +3208,24 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
}
if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
- if (put_user(sstatus.state, &src->s.status.state) ||
- put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) ||
- put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) ||
- put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec) ||
- put_user(sstatus.suspended_state, &src->s.status.suspended_state) ||
- put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec) ||
- put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec) ||
- put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
- put_user(scontrol.avail_min, &src->c.control.avail_min))
+
+ if (!user_write_access_begin(src, sizeof(*src)))
return -EFAULT;
+ err = -EFAULT;
+ unsafe_put_user(sstatus.state, &src->s.status.state, Efault_wr);
+ unsafe_put_user(sstatus.hw_ptr, &src->s.status.hw_ptr, Efault_wr);
+ unsafe_put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec, Efault_wr);
+ unsafe_put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec, Efault_wr);
+ unsafe_put_user(sstatus.suspended_state, &src->s.status.suspended_state, Efault_wr);
+ unsafe_put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec, Efault_wr);
+ unsafe_put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec, Efault_wr);
+ unsafe_put_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_wr);
+ unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min, Efault_wr);
+ err = 0;
+Efault_wr:
+ user_write_access_end();
- return 0;
+ return err;
}
#define __SNDRV_PCM_IOCTL_SYNC_PTR32 _IOWR('A', 0x23, struct snd_pcm_sync_ptr32)
--
2.47.0
Hi Christophe, kernel test robot noticed the following build errors: [auto build test ERROR on tiwai-sound/for-next] [also build test ERROR on tiwai-sound/for-linus linus/master v6.16-rc1 next-20250613] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Christophe-Leroy/ALSA-pcm-Convert-snd_pcm_ioctl_sync_ptr_-compat-x32-to-user_access_begin-user_access_end/20250612-185240 base: https://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git for-next patch link: https://lore.kernel.org/r/8df11af98033e4cb4d9b0f16d6e9d5b69110b036.1749724057.git.christophe.leroy%40csgroup.eu patch subject: [PATCH] ALSA: pcm: Convert snd_pcm_ioctl_sync_ptr_{compat/x32} to user_access_begin/user_access_end() config: x86_64-randconfig-008-20250615 (https://download.01.org/0day-ci/archive/20250615/202506151632.jvLtNHPb-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250615/202506151632.jvLtNHPb-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202506151632.jvLtNHPb-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from sound/core/pcm_native.c:4033: sound/core/pcm_compat.c: In function 'snd_pcm_ioctl_sync_ptr_x32': >> sound/core/pcm_compat.c:473:70: error: macro "unsafe_put_user" requires 3 arguments, but only 2 given 473 | unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr); | ^ In file included from include/linux/uaccess.h:12, from include/linux/sched/task.h:13, from include/linux/sched/signal.h:9, from include/linux/rcuwait.h:6, from include/linux/percpu-rwsem.h:7, from include/linux/fs.h:34, from include/linux/compat.h:17, from sound/core/pcm_native.c:7: arch/x86/include/asm/uaccess.h:532: note: macro "unsafe_put_user" defined here 532 | #define unsafe_put_user(x, ptr, label) \ | >> sound/core/pcm_compat.c:473:9: error: 'unsafe_put_user' undeclared (first use in this function) 473 | unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr); | ^~~~~~~~~~~~~~~ sound/core/pcm_compat.c:473:9: note: each undeclared identifier is reported only once for each function it appears in >> sound/core/pcm_compat.c:473:73: error: 'Efault_wr' undeclared (first use in this function) 473 | unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr); | ^~~~~~~~~ sound/core/pcm_compat.c:473:71: warning: left-hand operand of comma expression has no effect [-Wunused-value] 473 | unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr); | ^ >> sound/core/pcm_compat.c:473:82: error: expected ';' before ')' token 473 | unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr); | ^ | ; >> sound/core/pcm_compat.c:473:82: error: expected statement before ')' token vim +/unsafe_put_user +473 sound/core/pcm_compat.c 405 406 static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, 407 struct snd_pcm_sync_ptr_x32 __user *src) 408 { 409 struct snd_pcm_runtime *runtime = substream->runtime; 410 volatile struct snd_pcm_mmap_status *status; 411 volatile struct snd_pcm_mmap_control *control; 412 u32 sflags; 413 struct snd_pcm_mmap_control scontrol; 414 struct snd_pcm_mmap_status sstatus; 415 snd_pcm_uframes_t boundary; 416 int err; 417 418 if (snd_BUG_ON(!runtime)) 419 return -EINVAL; 420 421 if (!user_read_access_begin(src, sizeof(*src))) 422 return -EFAULT; 423 err = -EFAULT; 424 unsafe_get_user(sflags, &src->flags, Efault_rd); 425 unsafe_get_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_rd); 426 unsafe_get_user(scontrol.avail_min, &src->c.control.avail_min, Efault_rd); 427 err = 0; 428 Efault_rd: 429 user_read_access_end(); 430 if (err) 431 return err; 432 433 if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) { 434 err = snd_pcm_hwsync(substream); 435 if (err < 0) 436 return err; 437 } 438 status = runtime->status; 439 control = runtime->control; 440 boundary = recalculate_boundary(runtime); 441 if (!boundary) 442 boundary = 0x7fffffff; 443 scoped_guard(pcm_stream_lock_irq, substream) { 444 /* FIXME: we should consider the boundary for the sync from app */ 445 if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) 446 control->appl_ptr = scontrol.appl_ptr; 447 else 448 scontrol.appl_ptr = control->appl_ptr % boundary; 449 if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) 450 control->avail_min = scontrol.avail_min; 451 else 452 scontrol.avail_min = control->avail_min; 453 sstatus.state = status->state; 454 sstatus.hw_ptr = status->hw_ptr % boundary; 455 sstatus.tstamp = status->tstamp; 456 sstatus.suspended_state = status->suspended_state; 457 sstatus.audio_tstamp = status->audio_tstamp; 458 } 459 if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) 460 snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); 461 462 if (!user_write_access_begin(src, sizeof(*src))) 463 return -EFAULT; 464 err = -EFAULT; 465 unsafe_put_user(sstatus.state, &src->s.status.state, Efault_wr); 466 unsafe_put_user(sstatus.hw_ptr, &src->s.status.hw_ptr, Efault_wr); 467 unsafe_put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec, Efault_wr); 468 unsafe_put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec, Efault_wr); 469 unsafe_put_user(sstatus.suspended_state, &src->s.status.suspended_state, Efault_wr); 470 unsafe_put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec, Efault_wr); 471 unsafe_put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec, Efault_wr); 472 unsafe_put_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_wr); > 473 unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr); 474 err = 0; 475 Efault_wr: 476 user_write_access_end(); 477 478 return err; 479 } 480 #endif /* CONFIG_X86_X32_ABI */ 481 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Thu, 12 Jun 2025 12:39:39 +0200, Christophe Leroy wrote: > > With user access protection (Called SMAP on x86 or KUAP on powerpc) > each and every call to get_user() or put_user() performs heavy > operations to unlock and lock kernel access to userspace. > > SNDRV_PCM_IOCTL_SYNC_PTR ioctl is a hot path that needs to be > optimised. To do that, perform user accesses by blocks using > user_access_begin/user_access_end() and unsafe_get_user()/ > unsafe_put_user() and alike. > > Before the patch the 9 calls to put_user() at the > end of snd_pcm_ioctl_sync_ptr_compat() imply the following set of > instructions about 9 times (access_ok - enable user - write - disable > user): > 0.00 : c057f858: 3d 20 7f ff lis r9,32767 > 0.29 : c057f85c: 39 5e 00 14 addi r10,r30,20 > 0.77 : c057f860: 61 29 ff fc ori r9,r9,65532 > 0.32 : c057f864: 7c 0a 48 40 cmplw r10,r9 > 0.36 : c057f868: 41 a1 fb 58 bgt c057f3c0 <snd_pcm_ioctl+0xbb0> > 0.30 : c057f86c: 3d 20 dc 00 lis r9,-9216 > 1.95 : c057f870: 7d 3a c3 a6 mtspr 794,r9 > 0.33 : c057f874: 92 8a 00 00 stw r20,0(r10) > 0.27 : c057f878: 3d 20 de 00 lis r9,-8704 > 0.28 : c057f87c: 7d 3a c3 a6 mtspr 794,r9 > ... > > A perf profile shows that in total the 9 put_user() represent 36% of > the time spent in snd_pcm_ioctl() and about 80 instructions. > > With this patch everything is done in 13 instructions and represent > only 15% of the time spent in snd_pcm_ioctl(): > > 0.57 : c057f5dc: 3d 20 dc 00 lis r9,-9216 > 0.98 : c057f5e0: 7d 3a c3 a6 mtspr 794,r9 > 0.16 : c057f5e4: 92 7f 00 04 stw r19,4(r31) > 0.63 : c057f5e8: 93 df 00 0c stw r30,12(r31) > 0.16 : c057f5ec: 93 9f 00 10 stw r28,16(r31) > 4.95 : c057f5f0: 92 9f 00 14 stw r20,20(r31) > 0.19 : c057f5f4: 92 5f 00 18 stw r18,24(r31) > 0.49 : c057f5f8: 92 bf 00 1c stw r21,28(r31) > 0.27 : c057f5fc: 93 7f 00 20 stw r27,32(r31) > 5.88 : c057f600: 93 36 00 00 stw r25,0(r22) > 0.11 : c057f604: 93 17 00 00 stw r24,0(r23) > 0.00 : c057f608: 3d 20 de 00 lis r9,-8704 > 0.79 : c057f60c: 7d 3a c3 a6 mtspr 794,r9 > > Note that here the access_ok() in user_write_access_begin() is skipped > because the exact same verification has already been performed at the > beginning of the fonction with the call to user_read_access_begin(). > > Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> > --- > This is a lighter version of previous patch "[PATCH v2] ALSA: pcm: Convert multiple {get/put}_user to user_access_begin/user_access_end()" focussing on identified hot path. > Moved and nested the failure labels closer in order to increase readability Thanks for the revised patch! Although it's now much lighter, I still believe that we can reduce get_user() / put_user() calls significantly by adjusting the struct usage. Could you check whether the patch below can improve? (Zero-ing of sstatus can be an overhead here, but there are some holes, and these need to be initialized before copying back...) Takashi --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -410,8 +410,8 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, volatile struct snd_pcm_mmap_status *status; volatile struct snd_pcm_mmap_control *control; u32 sflags; - struct snd_pcm_mmap_control scontrol; - struct snd_pcm_mmap_status sstatus; + struct snd_pcm_mmap_control_x32 scontrol; + struct snd_pcm_mmap_status_x32 sstatus = {}; snd_pcm_uframes_t boundary; int err; @@ -419,8 +419,7 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, return -EINVAL; if (get_user(sflags, &src->flags) || - get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) || - get_user(scontrol.avail_min, &src->c.control.avail_min)) + copy_from_user(&scontrol, &src->c.control)) return -EFAULT; if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) { err = snd_pcm_hwsync(substream); @@ -444,21 +443,16 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, scontrol.avail_min = control->avail_min; sstatus.state = status->state; sstatus.hw_ptr = status->hw_ptr % boundary; - sstatus.tstamp = status->tstamp; + sstatus.tstamp_sec = status->tstamp.tv_sec; + sstatus.tstamp_nsec = status->tstamp.tv_nsec; sstatus.suspended_state = status->suspended_state; - sstatus.audio_tstamp = status->audio_tstamp; + sstatus.audio_tstamp_sec = status->audio_tstamp.tv_sec; + sstatus.audio_tstamp_nsec = status->audio_tstamp.tv_nsec; } if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); - if (put_user(sstatus.state, &src->s.status.state) || - put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) || - put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) || - put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec) || - put_user(sstatus.suspended_state, &src->s.status.suspended_state) || - put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec) || - put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec) || - put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) || - put_user(scontrol.avail_min, &src->c.control.avail_min)) + if (copy_to_user(&src->s.status, &sstatus, sizeof(sstatus)) || + copy_to_user(&src->c.control, &scontrol, sizeof(scontrol))) return -EFAULT; return 0; --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3157,8 +3157,8 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, volatile struct snd_pcm_mmap_status *status; volatile struct snd_pcm_mmap_control *control; u32 sflags; - struct snd_pcm_mmap_control scontrol; - struct snd_pcm_mmap_status sstatus; + struct snd_pcm_mmap_control32 scontrol; + struct snd_pcm_mmap_status32 sstatus = {}; snd_pcm_uframes_t boundary; int err; @@ -3166,8 +3166,7 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, return -EINVAL; if (get_user(sflags, &src->flags) || - get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) || - get_user(scontrol.avail_min, &src->c.control.avail_min)) + copy_from_user(&scontrol, &src->c.control, sizeof(scontrol))) return -EFAULT; if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) { err = snd_pcm_hwsync(substream); @@ -3194,21 +3193,16 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, scontrol.avail_min = control->avail_min; sstatus.state = status->state; sstatus.hw_ptr = status->hw_ptr % boundary; - sstatus.tstamp = status->tstamp; + sstatus.tstamp_sec = status->tstamp.tv_sec; + sstatus.tstamp_nsec = status->tstamp.tv_nsec; sstatus.suspended_state = status->suspended_state; - sstatus.audio_tstamp = status->audio_tstamp; + sstatus.audio_tstamp_sec = status->audio_tstamp.tv_sec; + sstatus.audio_tstamp_nsec = status->audio_tstamp.tv_nsec; } if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); - if (put_user(sstatus.state, &src->s.status.state) || - put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) || - put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) || - put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec) || - put_user(sstatus.suspended_state, &src->s.status.suspended_state) || - put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec) || - put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec) || - put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) || - put_user(scontrol.avail_min, &src->c.control.avail_min)) + if (copy_to_user(&src->s.status, &sstatus, sizeof(sstatus)) || + copy_to_user(&src->c.control, &scontrol, sizeof(scontrol))) return -EFAULT; return 0;
Le 12/06/2025 à 13:02, Takashi Iwai a écrit : > On Thu, 12 Jun 2025 12:39:39 +0200, > Christophe Leroy wrote: >> >> With user access protection (Called SMAP on x86 or KUAP on powerpc) >> each and every call to get_user() or put_user() performs heavy >> operations to unlock and lock kernel access to userspace. >> >> SNDRV_PCM_IOCTL_SYNC_PTR ioctl is a hot path that needs to be >> optimised. To do that, perform user accesses by blocks using >> user_access_begin/user_access_end() and unsafe_get_user()/ >> unsafe_put_user() and alike. >> >> Before the patch the 9 calls to put_user() at the >> end of snd_pcm_ioctl_sync_ptr_compat() imply the following set of >> instructions about 9 times (access_ok - enable user - write - disable >> user): >> 0.00 : c057f858: 3d 20 7f ff lis r9,32767 >> 0.29 : c057f85c: 39 5e 00 14 addi r10,r30,20 >> 0.77 : c057f860: 61 29 ff fc ori r9,r9,65532 >> 0.32 : c057f864: 7c 0a 48 40 cmplw r10,r9 >> 0.36 : c057f868: 41 a1 fb 58 bgt c057f3c0 <snd_pcm_ioctl+0xbb0> >> 0.30 : c057f86c: 3d 20 dc 00 lis r9,-9216 >> 1.95 : c057f870: 7d 3a c3 a6 mtspr 794,r9 >> 0.33 : c057f874: 92 8a 00 00 stw r20,0(r10) >> 0.27 : c057f878: 3d 20 de 00 lis r9,-8704 >> 0.28 : c057f87c: 7d 3a c3 a6 mtspr 794,r9 >> ... >> >> A perf profile shows that in total the 9 put_user() represent 36% of >> the time spent in snd_pcm_ioctl() and about 80 instructions. >> >> With this patch everything is done in 13 instructions and represent >> only 15% of the time spent in snd_pcm_ioctl(): >> >> 0.57 : c057f5dc: 3d 20 dc 00 lis r9,-9216 >> 0.98 : c057f5e0: 7d 3a c3 a6 mtspr 794,r9 >> 0.16 : c057f5e4: 92 7f 00 04 stw r19,4(r31) >> 0.63 : c057f5e8: 93 df 00 0c stw r30,12(r31) >> 0.16 : c057f5ec: 93 9f 00 10 stw r28,16(r31) >> 4.95 : c057f5f0: 92 9f 00 14 stw r20,20(r31) >> 0.19 : c057f5f4: 92 5f 00 18 stw r18,24(r31) >> 0.49 : c057f5f8: 92 bf 00 1c stw r21,28(r31) >> 0.27 : c057f5fc: 93 7f 00 20 stw r27,32(r31) >> 5.88 : c057f600: 93 36 00 00 stw r25,0(r22) >> 0.11 : c057f604: 93 17 00 00 stw r24,0(r23) >> 0.00 : c057f608: 3d 20 de 00 lis r9,-8704 >> 0.79 : c057f60c: 7d 3a c3 a6 mtspr 794,r9 >> >> Note that here the access_ok() in user_write_access_begin() is skipped >> because the exact same verification has already been performed at the >> beginning of the fonction with the call to user_read_access_begin(). >> >> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> >> --- >> This is a lighter version of previous patch "[PATCH v2] ALSA: pcm: Convert multiple {get/put}_user to user_access_begin/user_access_end()" focussing on identified hot path. >> Moved and nested the failure labels closer in order to increase readability > > Thanks for the revised patch! > > Although it's now much lighter, I still believe that we can reduce > get_user() / put_user() calls significantly by adjusting the struct > usage. > > Could you check whether the patch below can improve? > (Zero-ing of sstatus can be an overhead here, but there are some > holes, and these need to be initialized before copying back...) > Thanks for the proposed patch. Unfortunately it doesn't improve the situation. The problem with copy_from_user() and copy_to_user() is that they perform quite bad on small regions. And for the from_user side we still get two user access enable/disable instead 3 and for the to_user side we still get two as well (Allthough we had 7 previously). Those 4 user access enable/disable still have a cost. Nowadays the tendency is really to go for the unsafe_put/get_user style, see some significant exemples below. And as mentioned in those commits, behind the performance improvement it also lead to much cleaner code generation. - 38ebcf5096a8 ("scm: optimize put_cmsg()") - 9f79b78ef744 ("Convert filldir[64]() from __put_user() to unsafe_put_user()") - ef0ba0553829 ("poll: fix performance regression due to out-of-line __put_user()") Commit 38ebcf5096a8 is explicit about copy_to_user() being bad for small regions. Here below is some comparison between the three way of doing snd_pcm_ioctl_sync_ptr_compat(), output is from 'perf top': Initially I got the following. That 12%+ is the reason why I started investigating. 14.20% test_perf [.] engine_main ==> 12.86% [kernel] [k] snd_pcm_ioctl 11.91% [kernel] [k] finish_task_switch.isra.0 4.15% [kernel] [k] snd_pcm_group_unlock_irq.part.0 4.07% libc.so.6 [.] __ioctl_time64 3.58% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin_generic 3.37% [kernel] [k] sys_ioctl 2.96% libasound.so.2.0.0 [.] snd_pcm_hw_avail_update 2.73% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin 2.58% [kernel] [k] system_call_exception 1.93% libasound.so.2.0.0 [.] sync_ptr1 1.85% libasound.so.2.0.0 [.] snd_pcm_unlock 1.84% libasound.so.2.0.0 [.] snd_pcm_mmap_begin 1.83% libasound.so.2.0.0 [.] bad_pcm_state 1.68% libasound.so.2.0.0 [.] snd_pcm_mmap_avail 1.67% libasound.so.2.0.0 [.] snd_pcm_avail_update With _your_ patch I get the following. copy_from_user() calls _copy_from_user() and copy_to_user() calls _copy_to_user(). Both then call __copy_tofrom_user(). In total it is 16.4% so it is worse than before. 14.47% test_perf [.] engine_main 12.00% [kernel] [k] finish_task_switch.isra.0 ==> 8.37% [kernel] [k] snd_pcm_ioctl 5.44% libc.so.6 [.] __ioctl_time64 5.03% [kernel] [k] snd_pcm_group_unlock_irq.part.0 ==> 4.86% [kernel] [k] __copy_tofrom_user 4.62% [kernel] [k] sys_ioctl 3.22% [kernel] [k] system_call_exception 2.42% libasound.so.2.0.0 [.] snd_pcm_mmap_begin 2.31% [kernel] [k] fdget 2.23% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin_generic 2.19% [kernel] [k] syscall_exit_prepare 1.92% libasound.so.2.0.0 [.] snd_pcm_mmap_avail 1.86% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin 1.68% libasound.so.2.0.0 [.] snd_pcm_hw_avail_update ==> 1.67% [kernel] [k] _copy_from_user 1.66% libasound.so.2.0.0 [.] bad_pcm_state ==> 1.53% [kernel] [k] _copy_to_user 1.40% libasound.so.2.0.0 [.] sync_ptr1 With my patch I get the following: 17.46% test_perf [.] engine_main 9.14% [kernel] [k] finish_task_switch.isra.0 ==> 4.92% [kernel] [k] snd_pcm_ioctl 3.99% [kernel] [k] snd_pcm_group_unlock_irq.part.0 3.71% libc.so.6 [.] __ioctl_time64 3.61% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin_generic 2.72% libasound.so.2.0.0 [.] sync_ptr1 2.65% [kernel] [k] system_call_exception 2.46% [kernel] [k] sys_ioctl 2.43% [kernel] [k] __rseq_handle_notify_resume 2.34% [kernel] [k] do_epoll_wait 2.30% libasound.so.2.0.0 [.] __snd_pcm_mmap_commit 2.14% libasound.so.2.0.0 [.] __snd_pcm_avail 2.04% libasound.so.2.0.0 [.] snd_pcm_hw_avail_update 1.89% libasound.so.2.0.0 [.] snd_pcm_lock 1.84% libasound.so.2.0.0 [.] snd_pcm_mmap_avail 1.76% libasound.so.2.0.0 [.] __snd_pcm_avail_update 1.61% libasound.so.2.0.0 [.] bad_pcm_state 1.60% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin 1.49% libasound.so.2.0.0 [.] query_status_data Christophe
On Fri, 13 Jun 2025 07:24:46 +0200, Christophe Leroy wrote: > > > > Le 12/06/2025 à 13:02, Takashi Iwai a écrit : > > On Thu, 12 Jun 2025 12:39:39 +0200, > > Christophe Leroy wrote: > >> > >> With user access protection (Called SMAP on x86 or KUAP on powerpc) > >> each and every call to get_user() or put_user() performs heavy > >> operations to unlock and lock kernel access to userspace. > >> > >> SNDRV_PCM_IOCTL_SYNC_PTR ioctl is a hot path that needs to be > >> optimised. To do that, perform user accesses by blocks using > >> user_access_begin/user_access_end() and unsafe_get_user()/ > >> unsafe_put_user() and alike. > >> > >> Before the patch the 9 calls to put_user() at the > >> end of snd_pcm_ioctl_sync_ptr_compat() imply the following set of > >> instructions about 9 times (access_ok - enable user - write - disable > >> user): > >> 0.00 : c057f858: 3d 20 7f ff lis r9,32767 > >> 0.29 : c057f85c: 39 5e 00 14 addi r10,r30,20 > >> 0.77 : c057f860: 61 29 ff fc ori r9,r9,65532 > >> 0.32 : c057f864: 7c 0a 48 40 cmplw r10,r9 > >> 0.36 : c057f868: 41 a1 fb 58 bgt c057f3c0 <snd_pcm_ioctl+0xbb0> > >> 0.30 : c057f86c: 3d 20 dc 00 lis r9,-9216 > >> 1.95 : c057f870: 7d 3a c3 a6 mtspr 794,r9 > >> 0.33 : c057f874: 92 8a 00 00 stw r20,0(r10) > >> 0.27 : c057f878: 3d 20 de 00 lis r9,-8704 > >> 0.28 : c057f87c: 7d 3a c3 a6 mtspr 794,r9 > >> ... > >> > >> A perf profile shows that in total the 9 put_user() represent 36% of > >> the time spent in snd_pcm_ioctl() and about 80 instructions. > >> > >> With this patch everything is done in 13 instructions and represent > >> only 15% of the time spent in snd_pcm_ioctl(): > >> > >> 0.57 : c057f5dc: 3d 20 dc 00 lis r9,-9216 > >> 0.98 : c057f5e0: 7d 3a c3 a6 mtspr 794,r9 > >> 0.16 : c057f5e4: 92 7f 00 04 stw r19,4(r31) > >> 0.63 : c057f5e8: 93 df 00 0c stw r30,12(r31) > >> 0.16 : c057f5ec: 93 9f 00 10 stw r28,16(r31) > >> 4.95 : c057f5f0: 92 9f 00 14 stw r20,20(r31) > >> 0.19 : c057f5f4: 92 5f 00 18 stw r18,24(r31) > >> 0.49 : c057f5f8: 92 bf 00 1c stw r21,28(r31) > >> 0.27 : c057f5fc: 93 7f 00 20 stw r27,32(r31) > >> 5.88 : c057f600: 93 36 00 00 stw r25,0(r22) > >> 0.11 : c057f604: 93 17 00 00 stw r24,0(r23) > >> 0.00 : c057f608: 3d 20 de 00 lis r9,-8704 > >> 0.79 : c057f60c: 7d 3a c3 a6 mtspr 794,r9 > >> > >> Note that here the access_ok() in user_write_access_begin() is skipped > >> because the exact same verification has already been performed at the > >> beginning of the fonction with the call to user_read_access_begin(). > >> > >> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> > >> --- > >> This is a lighter version of previous patch "[PATCH v2] ALSA: pcm: Convert multiple {get/put}_user to user_access_begin/user_access_end()" focussing on identified hot path. > >> Moved and nested the failure labels closer in order to increase readability > > > > Thanks for the revised patch! > > > > Although it's now much lighter, I still believe that we can reduce > > get_user() / put_user() calls significantly by adjusting the struct > > usage. > > > > Could you check whether the patch below can improve? > > (Zero-ing of sstatus can be an overhead here, but there are some > > holes, and these need to be initialized before copying back...) > > > > Thanks for the proposed patch. Unfortunately it doesn't improve the > situation. The problem with copy_from_user() and copy_to_user() is > that they perform quite bad on small regions. And for the from_user > side we still get two user access enable/disable instead 3 and for the > to_user side we still get two as well (Allthough we had 7 > previously). Those 4 user access enable/disable still have a cost. > > Nowadays the tendency is really to go for the unsafe_put/get_user > style, see some significant exemples below. And as mentioned in those > commits, behind the performance improvement it also lead to much > cleaner code generation. > - 38ebcf5096a8 ("scm: optimize put_cmsg()") > - 9f79b78ef744 ("Convert filldir[64]() from __put_user() to > unsafe_put_user()") > - ef0ba0553829 ("poll: fix performance regression due to out-of-line > __put_user()") > > Commit 38ebcf5096a8 is explicit about copy_to_user() being bad for > small regions. > > Here below is some comparison between the three way of doing > snd_pcm_ioctl_sync_ptr_compat(), output is from 'perf top': > > Initially I got the following. That 12%+ is the reason why I started > investigating. > > 14.20% test_perf [.] engine_main > ==> 12.86% [kernel] [k] snd_pcm_ioctl > 11.91% [kernel] [k] finish_task_switch.isra.0 > 4.15% [kernel] [k] snd_pcm_group_unlock_irq.part.0 > 4.07% libc.so.6 [.] __ioctl_time64 > 3.58% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin_generic > 3.37% [kernel] [k] sys_ioctl > 2.96% libasound.so.2.0.0 [.] snd_pcm_hw_avail_update > 2.73% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin > 2.58% [kernel] [k] system_call_exception > 1.93% libasound.so.2.0.0 [.] sync_ptr1 > 1.85% libasound.so.2.0.0 [.] snd_pcm_unlock > 1.84% libasound.so.2.0.0 [.] snd_pcm_mmap_begin > 1.83% libasound.so.2.0.0 [.] bad_pcm_state > 1.68% libasound.so.2.0.0 [.] snd_pcm_mmap_avail > 1.67% libasound.so.2.0.0 [.] snd_pcm_avail_update > > With _your_ patch I get the following. copy_from_user() calls > _copy_from_user() and copy_to_user() calls _copy_to_user(). Both then > call __copy_tofrom_user(). In total it is 16.4% so it is worse than > before. > > 14.47% test_perf [.] engine_main > 12.00% [kernel] [k] finish_task_switch.isra.0 > ==> 8.37% [kernel] [k] snd_pcm_ioctl > 5.44% libc.so.6 [.] __ioctl_time64 > 5.03% [kernel] [k] snd_pcm_group_unlock_irq.part.0 > ==> 4.86% [kernel] [k] __copy_tofrom_user > 4.62% [kernel] [k] sys_ioctl > 3.22% [kernel] [k] system_call_exception > 2.42% libasound.so.2.0.0 [.] snd_pcm_mmap_begin > 2.31% [kernel] [k] fdget > 2.23% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin_generic > 2.19% [kernel] [k] syscall_exit_prepare > 1.92% libasound.so.2.0.0 [.] snd_pcm_mmap_avail > 1.86% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin > 1.68% libasound.so.2.0.0 [.] snd_pcm_hw_avail_update > ==> 1.67% [kernel] [k] _copy_from_user > 1.66% libasound.so.2.0.0 [.] bad_pcm_state > ==> 1.53% [kernel] [k] _copy_to_user > 1.40% libasound.so.2.0.0 [.] sync_ptr1 > > With my patch I get the following: > > 17.46% test_perf [.] engine_main > 9.14% [kernel] [k] finish_task_switch.isra.0 > ==> 4.92% [kernel] [k] snd_pcm_ioctl > 3.99% [kernel] [k] snd_pcm_group_unlock_irq.part.0 > 3.71% libc.so.6 [.] __ioctl_time64 > 3.61% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin_generic > 2.72% libasound.so.2.0.0 [.] sync_ptr1 > 2.65% [kernel] [k] system_call_exception > 2.46% [kernel] [k] sys_ioctl > 2.43% [kernel] [k] __rseq_handle_notify_resume > 2.34% [kernel] [k] do_epoll_wait > 2.30% libasound.so.2.0.0 [.] __snd_pcm_mmap_commit > 2.14% libasound.so.2.0.0 [.] __snd_pcm_avail > 2.04% libasound.so.2.0.0 [.] snd_pcm_hw_avail_update > 1.89% libasound.so.2.0.0 [.] snd_pcm_lock > 1.84% libasound.so.2.0.0 [.] snd_pcm_mmap_avail > 1.76% libasound.so.2.0.0 [.] __snd_pcm_avail_update > 1.61% libasound.so.2.0.0 [.] bad_pcm_state > 1.60% libasound.so.2.0.0 [.] __snd_pcm_mmap_begin > 1.49% libasound.so.2.0.0 [.] query_status_data Thanks for the detailed analysis! Then unsafe_*_user() seems to be the way to go. I'll check your latest patches. Takashi
© 2016 - 2025 Red Hat, Inc.