[PATCH] ALSA: pcm: Convert snd_pcm_ioctl_sync_ptr_{compat/x32} to user_access_begin/user_access_end()

Christophe Leroy posted 1 patch 4 months ago
sound/core/pcm_compat.c | 40 +++++++++++++++++++++++++++-------------
sound/core/pcm_native.c | 40 +++++++++++++++++++++++++++-------------
2 files changed, 54 insertions(+), 26 deletions(-)
[PATCH] ALSA: pcm: Convert snd_pcm_ioctl_sync_ptr_{compat/x32} to user_access_begin/user_access_end()
Posted by Christophe Leroy 4 months ago
With user access protection (Called SMAP on x86 or KUAP on powerpc)
each and every call to get_user() or put_user() performs heavy
operations to unlock and lock kernel access to userspace.

SNDRV_PCM_IOCTL_SYNC_PTR ioctl is a hot path that needs to be
optimised. To do that, perform user accesses by blocks using
user_access_begin/user_access_end() and unsafe_get_user()/
unsafe_put_user() and alike.

Before the patch the 9 calls to put_user() at the
end of snd_pcm_ioctl_sync_ptr_compat() imply the following set of
instructions about 9 times (access_ok - enable user - write - disable
user):
    0.00 :   c057f858:       3d 20 7f ff     lis     r9,32767
    0.29 :   c057f85c:       39 5e 00 14     addi    r10,r30,20
    0.77 :   c057f860:       61 29 ff fc     ori     r9,r9,65532
    0.32 :   c057f864:       7c 0a 48 40     cmplw   r10,r9
    0.36 :   c057f868:       41 a1 fb 58     bgt     c057f3c0 <snd_pcm_ioctl+0xbb0>
    0.30 :   c057f86c:       3d 20 dc 00     lis     r9,-9216
    1.95 :   c057f870:       7d 3a c3 a6     mtspr   794,r9
    0.33 :   c057f874:       92 8a 00 00     stw     r20,0(r10)
    0.27 :   c057f878:       3d 20 de 00     lis     r9,-8704
    0.28 :   c057f87c:       7d 3a c3 a6     mtspr   794,r9
...

A perf profile shows that in total the 9 put_user() represent 36% of
the time spent in snd_pcm_ioctl() and about 80 instructions.

With this patch everything is done in 13 instructions and represent
only 15% of the time spent in snd_pcm_ioctl():

    0.57 :   c057f5dc:       3d 20 dc 00     lis     r9,-9216
    0.98 :   c057f5e0:       7d 3a c3 a6     mtspr   794,r9
    0.16 :   c057f5e4:       92 7f 00 04     stw     r19,4(r31)
    0.63 :   c057f5e8:       93 df 00 0c     stw     r30,12(r31)
    0.16 :   c057f5ec:       93 9f 00 10     stw     r28,16(r31)
    4.95 :   c057f5f0:       92 9f 00 14     stw     r20,20(r31)
    0.19 :   c057f5f4:       92 5f 00 18     stw     r18,24(r31)
    0.49 :   c057f5f8:       92 bf 00 1c     stw     r21,28(r31)
    0.27 :   c057f5fc:       93 7f 00 20     stw     r27,32(r31)
    5.88 :   c057f600:       93 36 00 00     stw     r25,0(r22)
    0.11 :   c057f604:       93 17 00 00     stw     r24,0(r23)
    0.00 :   c057f608:       3d 20 de 00     lis     r9,-8704
    0.79 :   c057f60c:       7d 3a c3 a6     mtspr   794,r9

Note that here the access_ok() in user_write_access_begin() is skipped
because the exact same verification has already been performed at the
beginning of the fonction with the call to user_read_access_begin().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
This is a lighter version of previous patch "[PATCH v2] ALSA: pcm: Convert multiple {get/put}_user to user_access_begin/user_access_end()" focussing on identified hot path.
Moved and nested the failure labels closer in order to increase readability

Link: https://patchwork.ozlabs.org/project/linuxppc-dev/patch/d2609397eafc2b55ec1f44a3f30ccec00e0c7f6e.1749455639.git.christophe.leroy@csgroup.eu/

 sound/core/pcm_compat.c | 40 +++++++++++++++++++++++++++-------------
 sound/core/pcm_native.c | 40 +++++++++++++++++++++++++++-------------
 2 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index a42ec7f5a1da..348a72e6499e 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -418,10 +418,18 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
 	if (snd_BUG_ON(!runtime))
 		return -EINVAL;
 
-	if (get_user(sflags, &src->flags) ||
-	    get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
-	    get_user(scontrol.avail_min, &src->c.control.avail_min))
+	if (!user_read_access_begin(src, sizeof(*src)))
 		return -EFAULT;
+	err = -EFAULT;
+	unsafe_get_user(sflags, &src->flags, Efault_rd);
+	unsafe_get_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_rd);
+	unsafe_get_user(scontrol.avail_min, &src->c.control.avail_min, Efault_rd);
+	err = 0;
+Efault_rd:
+	user_read_access_end();
+	if (err)
+		return err;
+
 	if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
 		err = snd_pcm_hwsync(substream);
 		if (err < 0)
@@ -450,18 +458,24 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
 	}
 	if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
 		snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
-	if (put_user(sstatus.state, &src->s.status.state) ||
-	    put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) ||
-	    put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) ||
-	    put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec) ||
-	    put_user(sstatus.suspended_state, &src->s.status.suspended_state) ||
-	    put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec) ||
-	    put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec) ||
-	    put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
-	    put_user(scontrol.avail_min, &src->c.control.avail_min))
+
+	if (!user_write_access_begin(src, sizeof(*src)))
 		return -EFAULT;
+	err = -EFAULT;
+	unsafe_put_user(sstatus.state, &src->s.status.state, Efault_wr);
+	unsafe_put_user(sstatus.hw_ptr, &src->s.status.hw_ptr, Efault_wr);
+	unsafe_put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec, Efault_wr);
+	unsafe_put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec, Efault_wr);
+	unsafe_put_user(sstatus.suspended_state, &src->s.status.suspended_state, Efault_wr);
+	unsafe_put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec, Efault_wr);
+	unsafe_put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec, Efault_wr);
+	unsafe_put_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_wr);
+	unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr);
+	err = 0;
+Efault_wr:
+	user_write_access_end();
 
-	return 0;
+	return err;
 }
 #endif /* CONFIG_X86_X32_ABI */
 
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index ecb71bf1859d..b104faddb6e3 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3165,10 +3165,18 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
 	if (snd_BUG_ON(!runtime))
 		return -EINVAL;
 
-	if (get_user(sflags, &src->flags) ||
-	    get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
-	    get_user(scontrol.avail_min, &src->c.control.avail_min))
+	if (!user_read_access_begin(src, sizeof(*src)))
 		return -EFAULT;
+	err = -EFAULT;
+	unsafe_get_user(sflags, &src->flags, Efault_rd);
+	unsafe_get_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_rd);
+	unsafe_get_user(scontrol.avail_min, &src->c.control.avail_min, Efault_rd);
+	err = 0;
+Efault_rd:
+	user_read_access_end();
+	if (err)
+		return err;
+
 	if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
 		err = snd_pcm_hwsync(substream);
 		if (err < 0)
@@ -3200,18 +3208,24 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
 	}
 	if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
 		snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
-	if (put_user(sstatus.state, &src->s.status.state) ||
-	    put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) ||
-	    put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) ||
-	    put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec) ||
-	    put_user(sstatus.suspended_state, &src->s.status.suspended_state) ||
-	    put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec) ||
-	    put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec) ||
-	    put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
-	    put_user(scontrol.avail_min, &src->c.control.avail_min))
+
+	if (!user_write_access_begin(src, sizeof(*src)))
 		return -EFAULT;
+	err = -EFAULT;
+	unsafe_put_user(sstatus.state, &src->s.status.state, Efault_wr);
+	unsafe_put_user(sstatus.hw_ptr, &src->s.status.hw_ptr, Efault_wr);
+	unsafe_put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec, Efault_wr);
+	unsafe_put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec, Efault_wr);
+	unsafe_put_user(sstatus.suspended_state, &src->s.status.suspended_state, Efault_wr);
+	unsafe_put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec, Efault_wr);
+	unsafe_put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec, Efault_wr);
+	unsafe_put_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_wr);
+	unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min, Efault_wr);
+	err = 0;
+Efault_wr:
+	user_write_access_end();
 
-	return 0;
+	return err;
 }
 #define __SNDRV_PCM_IOCTL_SYNC_PTR32 _IOWR('A', 0x23, struct snd_pcm_sync_ptr32)
 
-- 
2.47.0
Re: [PATCH] ALSA: pcm: Convert snd_pcm_ioctl_sync_ptr_{compat/x32} to user_access_begin/user_access_end()
Posted by kernel test robot 3 months, 3 weeks ago
Hi Christophe,

kernel test robot noticed the following build errors:

[auto build test ERROR on tiwai-sound/for-next]
[also build test ERROR on tiwai-sound/for-linus linus/master v6.16-rc1 next-20250613]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Christophe-Leroy/ALSA-pcm-Convert-snd_pcm_ioctl_sync_ptr_-compat-x32-to-user_access_begin-user_access_end/20250612-185240
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git for-next
patch link:    https://lore.kernel.org/r/8df11af98033e4cb4d9b0f16d6e9d5b69110b036.1749724057.git.christophe.leroy%40csgroup.eu
patch subject: [PATCH] ALSA: pcm: Convert snd_pcm_ioctl_sync_ptr_{compat/x32} to user_access_begin/user_access_end()
config: x86_64-randconfig-008-20250615 (https://download.01.org/0day-ci/archive/20250615/202506151632.jvLtNHPb-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250615/202506151632.jvLtNHPb-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202506151632.jvLtNHPb-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from sound/core/pcm_native.c:4033:
   sound/core/pcm_compat.c: In function 'snd_pcm_ioctl_sync_ptr_x32':
>> sound/core/pcm_compat.c:473:70: error: macro "unsafe_put_user" requires 3 arguments, but only 2 given
     473 |         unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr);
         |                                                                      ^
   In file included from include/linux/uaccess.h:12,
                    from include/linux/sched/task.h:13,
                    from include/linux/sched/signal.h:9,
                    from include/linux/rcuwait.h:6,
                    from include/linux/percpu-rwsem.h:7,
                    from include/linux/fs.h:34,
                    from include/linux/compat.h:17,
                    from sound/core/pcm_native.c:7:
   arch/x86/include/asm/uaccess.h:532: note: macro "unsafe_put_user" defined here
     532 | #define unsafe_put_user(x, ptr, label)  \
         | 
>> sound/core/pcm_compat.c:473:9: error: 'unsafe_put_user' undeclared (first use in this function)
     473 |         unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr);
         |         ^~~~~~~~~~~~~~~
   sound/core/pcm_compat.c:473:9: note: each undeclared identifier is reported only once for each function it appears in
>> sound/core/pcm_compat.c:473:73: error: 'Efault_wr' undeclared (first use in this function)
     473 |         unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr);
         |                                                                         ^~~~~~~~~
   sound/core/pcm_compat.c:473:71: warning: left-hand operand of comma expression has no effect [-Wunused-value]
     473 |         unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr);
         |                                                                       ^
>> sound/core/pcm_compat.c:473:82: error: expected ';' before ')' token
     473 |         unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr);
         |                                                                                  ^
         |                                                                                  ;
>> sound/core/pcm_compat.c:473:82: error: expected statement before ')' token


vim +/unsafe_put_user +473 sound/core/pcm_compat.c

   405	
   406	static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
   407					      struct snd_pcm_sync_ptr_x32 __user *src)
   408	{
   409		struct snd_pcm_runtime *runtime = substream->runtime;
   410		volatile struct snd_pcm_mmap_status *status;
   411		volatile struct snd_pcm_mmap_control *control;
   412		u32 sflags;
   413		struct snd_pcm_mmap_control scontrol;
   414		struct snd_pcm_mmap_status sstatus;
   415		snd_pcm_uframes_t boundary;
   416		int err;
   417	
   418		if (snd_BUG_ON(!runtime))
   419			return -EINVAL;
   420	
   421		if (!user_read_access_begin(src, sizeof(*src)))
   422			return -EFAULT;
   423		err = -EFAULT;
   424		unsafe_get_user(sflags, &src->flags, Efault_rd);
   425		unsafe_get_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_rd);
   426		unsafe_get_user(scontrol.avail_min, &src->c.control.avail_min, Efault_rd);
   427		err = 0;
   428	Efault_rd:
   429		user_read_access_end();
   430		if (err)
   431			return err;
   432	
   433		if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
   434			err = snd_pcm_hwsync(substream);
   435			if (err < 0)
   436				return err;
   437		}
   438		status = runtime->status;
   439		control = runtime->control;
   440		boundary = recalculate_boundary(runtime);
   441		if (!boundary)
   442			boundary = 0x7fffffff;
   443		scoped_guard(pcm_stream_lock_irq, substream) {
   444			/* FIXME: we should consider the boundary for the sync from app */
   445			if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
   446				control->appl_ptr = scontrol.appl_ptr;
   447			else
   448				scontrol.appl_ptr = control->appl_ptr % boundary;
   449			if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN))
   450				control->avail_min = scontrol.avail_min;
   451			else
   452				scontrol.avail_min = control->avail_min;
   453			sstatus.state = status->state;
   454			sstatus.hw_ptr = status->hw_ptr % boundary;
   455			sstatus.tstamp = status->tstamp;
   456			sstatus.suspended_state = status->suspended_state;
   457			sstatus.audio_tstamp = status->audio_tstamp;
   458		}
   459		if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
   460			snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
   461	
   462		if (!user_write_access_begin(src, sizeof(*src)))
   463			return -EFAULT;
   464		err = -EFAULT;
   465		unsafe_put_user(sstatus.state, &src->s.status.state, Efault_wr);
   466		unsafe_put_user(sstatus.hw_ptr, &src->s.status.hw_ptr, Efault_wr);
   467		unsafe_put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec, Efault_wr);
   468		unsafe_put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec, Efault_wr);
   469		unsafe_put_user(sstatus.suspended_state, &src->s.status.suspended_state, Efault_wr);
   470		unsafe_put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec, Efault_wr);
   471		unsafe_put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec, Efault_wr);
   472		unsafe_put_user(scontrol.appl_ptr, &src->c.control.appl_ptr, Efault_wr);
 > 473		unsafe_put_user(scontrol.avail_min, &src->c.control.avail_min), Efault_wr);
   474		err = 0;
   475	Efault_wr:
   476		user_write_access_end();
   477	
   478		return err;
   479	}
   480	#endif /* CONFIG_X86_X32_ABI */
   481	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] ALSA: pcm: Convert snd_pcm_ioctl_sync_ptr_{compat/x32} to user_access_begin/user_access_end()
Posted by Takashi Iwai 4 months ago
On Thu, 12 Jun 2025 12:39:39 +0200,
Christophe Leroy wrote:
> 
> With user access protection (Called SMAP on x86 or KUAP on powerpc)
> each and every call to get_user() or put_user() performs heavy
> operations to unlock and lock kernel access to userspace.
> 
> SNDRV_PCM_IOCTL_SYNC_PTR ioctl is a hot path that needs to be
> optimised. To do that, perform user accesses by blocks using
> user_access_begin/user_access_end() and unsafe_get_user()/
> unsafe_put_user() and alike.
> 
> Before the patch the 9 calls to put_user() at the
> end of snd_pcm_ioctl_sync_ptr_compat() imply the following set of
> instructions about 9 times (access_ok - enable user - write - disable
> user):
>     0.00 :   c057f858:       3d 20 7f ff     lis     r9,32767
>     0.29 :   c057f85c:       39 5e 00 14     addi    r10,r30,20
>     0.77 :   c057f860:       61 29 ff fc     ori     r9,r9,65532
>     0.32 :   c057f864:       7c 0a 48 40     cmplw   r10,r9
>     0.36 :   c057f868:       41 a1 fb 58     bgt     c057f3c0 <snd_pcm_ioctl+0xbb0>
>     0.30 :   c057f86c:       3d 20 dc 00     lis     r9,-9216
>     1.95 :   c057f870:       7d 3a c3 a6     mtspr   794,r9
>     0.33 :   c057f874:       92 8a 00 00     stw     r20,0(r10)
>     0.27 :   c057f878:       3d 20 de 00     lis     r9,-8704
>     0.28 :   c057f87c:       7d 3a c3 a6     mtspr   794,r9
> ...
> 
> A perf profile shows that in total the 9 put_user() represent 36% of
> the time spent in snd_pcm_ioctl() and about 80 instructions.
> 
> With this patch everything is done in 13 instructions and represent
> only 15% of the time spent in snd_pcm_ioctl():
> 
>     0.57 :   c057f5dc:       3d 20 dc 00     lis     r9,-9216
>     0.98 :   c057f5e0:       7d 3a c3 a6     mtspr   794,r9
>     0.16 :   c057f5e4:       92 7f 00 04     stw     r19,4(r31)
>     0.63 :   c057f5e8:       93 df 00 0c     stw     r30,12(r31)
>     0.16 :   c057f5ec:       93 9f 00 10     stw     r28,16(r31)
>     4.95 :   c057f5f0:       92 9f 00 14     stw     r20,20(r31)
>     0.19 :   c057f5f4:       92 5f 00 18     stw     r18,24(r31)
>     0.49 :   c057f5f8:       92 bf 00 1c     stw     r21,28(r31)
>     0.27 :   c057f5fc:       93 7f 00 20     stw     r27,32(r31)
>     5.88 :   c057f600:       93 36 00 00     stw     r25,0(r22)
>     0.11 :   c057f604:       93 17 00 00     stw     r24,0(r23)
>     0.00 :   c057f608:       3d 20 de 00     lis     r9,-8704
>     0.79 :   c057f60c:       7d 3a c3 a6     mtspr   794,r9
> 
> Note that here the access_ok() in user_write_access_begin() is skipped
> because the exact same verification has already been performed at the
> beginning of the fonction with the call to user_read_access_begin().
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
> This is a lighter version of previous patch "[PATCH v2] ALSA: pcm: Convert multiple {get/put}_user to user_access_begin/user_access_end()" focussing on identified hot path.
> Moved and nested the failure labels closer in order to increase readability

Thanks for the revised patch!

Although it's now much lighter, I still believe that we can reduce
get_user() / put_user() calls significantly by adjusting the struct
usage.

Could you check whether the patch below can improve?
(Zero-ing of sstatus can be an overhead here, but there are some
holes, and these need to be initialized before copying back...)


Takashi

--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -410,8 +410,8 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
 	volatile struct snd_pcm_mmap_status *status;
 	volatile struct snd_pcm_mmap_control *control;
 	u32 sflags;
-	struct snd_pcm_mmap_control scontrol;
-	struct snd_pcm_mmap_status sstatus;
+	struct snd_pcm_mmap_control_x32 scontrol;
+	struct snd_pcm_mmap_status_x32 sstatus = {};
 	snd_pcm_uframes_t boundary;
 	int err;
 
@@ -419,8 +419,7 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
 		return -EINVAL;
 
 	if (get_user(sflags, &src->flags) ||
-	    get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
-	    get_user(scontrol.avail_min, &src->c.control.avail_min))
+	    copy_from_user(&scontrol, &src->c.control))
 		return -EFAULT;
 	if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
 		err = snd_pcm_hwsync(substream);
@@ -444,21 +443,16 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
 			scontrol.avail_min = control->avail_min;
 		sstatus.state = status->state;
 		sstatus.hw_ptr = status->hw_ptr % boundary;
-		sstatus.tstamp = status->tstamp;
+		sstatus.tstamp_sec = status->tstamp.tv_sec;
+		sstatus.tstamp_nsec = status->tstamp.tv_nsec;
 		sstatus.suspended_state = status->suspended_state;
-		sstatus.audio_tstamp = status->audio_tstamp;
+		sstatus.audio_tstamp_sec = status->audio_tstamp.tv_sec;
+		sstatus.audio_tstamp_nsec = status->audio_tstamp.tv_nsec;
 	}
 	if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
 		snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
-	if (put_user(sstatus.state, &src->s.status.state) ||
-	    put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) ||
-	    put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) ||
-	    put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec) ||
-	    put_user(sstatus.suspended_state, &src->s.status.suspended_state) ||
-	    put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec) ||
-	    put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec) ||
-	    put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
-	    put_user(scontrol.avail_min, &src->c.control.avail_min))
+	if (copy_to_user(&src->s.status, &sstatus, sizeof(sstatus)) ||
+	    copy_to_user(&src->c.control, &scontrol, sizeof(scontrol)))
 		return -EFAULT;
 
 	return 0;
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3157,8 +3157,8 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
 	volatile struct snd_pcm_mmap_status *status;
 	volatile struct snd_pcm_mmap_control *control;
 	u32 sflags;
-	struct snd_pcm_mmap_control scontrol;
-	struct snd_pcm_mmap_status sstatus;
+	struct snd_pcm_mmap_control32 scontrol;
+	struct snd_pcm_mmap_status32 sstatus = {};
 	snd_pcm_uframes_t boundary;
 	int err;
 
@@ -3166,8 +3166,7 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
 		return -EINVAL;
 
 	if (get_user(sflags, &src->flags) ||
-	    get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
-	    get_user(scontrol.avail_min, &src->c.control.avail_min))
+	    copy_from_user(&scontrol, &src->c.control, sizeof(scontrol)))
 		return -EFAULT;
 	if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
 		err = snd_pcm_hwsync(substream);
@@ -3194,21 +3193,16 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
 			scontrol.avail_min = control->avail_min;
 		sstatus.state = status->state;
 		sstatus.hw_ptr = status->hw_ptr % boundary;
-		sstatus.tstamp = status->tstamp;
+		sstatus.tstamp_sec = status->tstamp.tv_sec;
+		sstatus.tstamp_nsec = status->tstamp.tv_nsec;
 		sstatus.suspended_state = status->suspended_state;
-		sstatus.audio_tstamp = status->audio_tstamp;
+		sstatus.audio_tstamp_sec = status->audio_tstamp.tv_sec;
+		sstatus.audio_tstamp_nsec = status->audio_tstamp.tv_nsec;
 	}
 	if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
 		snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
-	if (put_user(sstatus.state, &src->s.status.state) ||
-	    put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) ||
-	    put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) ||
-	    put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec) ||
-	    put_user(sstatus.suspended_state, &src->s.status.suspended_state) ||
-	    put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec) ||
-	    put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec) ||
-	    put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
-	    put_user(scontrol.avail_min, &src->c.control.avail_min))
+	if (copy_to_user(&src->s.status, &sstatus, sizeof(sstatus)) ||
+	    copy_to_user(&src->c.control, &scontrol, sizeof(scontrol)))
 		return -EFAULT;
 
 	return 0;
Re: [PATCH] ALSA: pcm: Convert snd_pcm_ioctl_sync_ptr_{compat/x32} to user_access_begin/user_access_end()
Posted by Christophe Leroy 3 months, 4 weeks ago

Le 12/06/2025 à 13:02, Takashi Iwai a écrit :
> On Thu, 12 Jun 2025 12:39:39 +0200,
> Christophe Leroy wrote:
>>
>> With user access protection (Called SMAP on x86 or KUAP on powerpc)
>> each and every call to get_user() or put_user() performs heavy
>> operations to unlock and lock kernel access to userspace.
>>
>> SNDRV_PCM_IOCTL_SYNC_PTR ioctl is a hot path that needs to be
>> optimised. To do that, perform user accesses by blocks using
>> user_access_begin/user_access_end() and unsafe_get_user()/
>> unsafe_put_user() and alike.
>>
>> Before the patch the 9 calls to put_user() at the
>> end of snd_pcm_ioctl_sync_ptr_compat() imply the following set of
>> instructions about 9 times (access_ok - enable user - write - disable
>> user):
>>      0.00 :   c057f858:       3d 20 7f ff     lis     r9,32767
>>      0.29 :   c057f85c:       39 5e 00 14     addi    r10,r30,20
>>      0.77 :   c057f860:       61 29 ff fc     ori     r9,r9,65532
>>      0.32 :   c057f864:       7c 0a 48 40     cmplw   r10,r9
>>      0.36 :   c057f868:       41 a1 fb 58     bgt     c057f3c0 <snd_pcm_ioctl+0xbb0>
>>      0.30 :   c057f86c:       3d 20 dc 00     lis     r9,-9216
>>      1.95 :   c057f870:       7d 3a c3 a6     mtspr   794,r9
>>      0.33 :   c057f874:       92 8a 00 00     stw     r20,0(r10)
>>      0.27 :   c057f878:       3d 20 de 00     lis     r9,-8704
>>      0.28 :   c057f87c:       7d 3a c3 a6     mtspr   794,r9
>> ...
>>
>> A perf profile shows that in total the 9 put_user() represent 36% of
>> the time spent in snd_pcm_ioctl() and about 80 instructions.
>>
>> With this patch everything is done in 13 instructions and represent
>> only 15% of the time spent in snd_pcm_ioctl():
>>
>>      0.57 :   c057f5dc:       3d 20 dc 00     lis     r9,-9216
>>      0.98 :   c057f5e0:       7d 3a c3 a6     mtspr   794,r9
>>      0.16 :   c057f5e4:       92 7f 00 04     stw     r19,4(r31)
>>      0.63 :   c057f5e8:       93 df 00 0c     stw     r30,12(r31)
>>      0.16 :   c057f5ec:       93 9f 00 10     stw     r28,16(r31)
>>      4.95 :   c057f5f0:       92 9f 00 14     stw     r20,20(r31)
>>      0.19 :   c057f5f4:       92 5f 00 18     stw     r18,24(r31)
>>      0.49 :   c057f5f8:       92 bf 00 1c     stw     r21,28(r31)
>>      0.27 :   c057f5fc:       93 7f 00 20     stw     r27,32(r31)
>>      5.88 :   c057f600:       93 36 00 00     stw     r25,0(r22)
>>      0.11 :   c057f604:       93 17 00 00     stw     r24,0(r23)
>>      0.00 :   c057f608:       3d 20 de 00     lis     r9,-8704
>>      0.79 :   c057f60c:       7d 3a c3 a6     mtspr   794,r9
>>
>> Note that here the access_ok() in user_write_access_begin() is skipped
>> because the exact same verification has already been performed at the
>> beginning of the fonction with the call to user_read_access_begin().
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> ---
>> This is a lighter version of previous patch "[PATCH v2] ALSA: pcm: Convert multiple {get/put}_user to user_access_begin/user_access_end()" focussing on identified hot path.
>> Moved and nested the failure labels closer in order to increase readability
> 
> Thanks for the revised patch!
> 
> Although it's now much lighter, I still believe that we can reduce
> get_user() / put_user() calls significantly by adjusting the struct
> usage.
> 
> Could you check whether the patch below can improve?
> (Zero-ing of sstatus can be an overhead here, but there are some
> holes, and these need to be initialized before copying back...)
> 

Thanks for the proposed patch. Unfortunately it doesn't improve the 
situation. The problem with copy_from_user() and copy_to_user() is that 
they perform quite bad on small regions. And for the from_user side we 
still get two user access enable/disable instead 3 and for the to_user 
side we still get two as well (Allthough we had 7 previously). Those 4 
user access enable/disable still have a cost.

Nowadays the tendency is really to go for the unsafe_put/get_user style, 
see some significant exemples below. And as mentioned in those commits, 
behind the performance improvement it also lead to much cleaner code 
generation.
- 38ebcf5096a8 ("scm: optimize put_cmsg()")
- 9f79b78ef744 ("Convert filldir[64]() from __put_user() to 
unsafe_put_user()")
- ef0ba0553829 ("poll: fix performance regression due to out-of-line 
__put_user()")

Commit 38ebcf5096a8 is explicit about copy_to_user() being bad for small 
regions.

Here below is some comparison between the three way of doing 
snd_pcm_ioctl_sync_ptr_compat(), output is from 'perf top':

Initially I got the following. That 12%+ is the reason why I started 
investigating.

     14.20%  test_perf           [.] engine_main
==> 12.86%  [kernel]            [k] snd_pcm_ioctl
     11.91%  [kernel]            [k] finish_task_switch.isra.0
      4.15%  [kernel]            [k] snd_pcm_group_unlock_irq.part.0
      4.07%  libc.so.6           [.] __ioctl_time64
      3.58%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin_generic
      3.37%  [kernel]            [k] sys_ioctl
      2.96%  libasound.so.2.0.0  [.] snd_pcm_hw_avail_update
      2.73%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin
      2.58%  [kernel]            [k] system_call_exception
      1.93%  libasound.so.2.0.0  [.] sync_ptr1
      1.85%  libasound.so.2.0.0  [.] snd_pcm_unlock
      1.84%  libasound.so.2.0.0  [.] snd_pcm_mmap_begin
      1.83%  libasound.so.2.0.0  [.] bad_pcm_state
      1.68%  libasound.so.2.0.0  [.] snd_pcm_mmap_avail
      1.67%  libasound.so.2.0.0  [.] snd_pcm_avail_update

With _your_ patch I get the following. copy_from_user() calls 
_copy_from_user() and copy_to_user() calls _copy_to_user(). Both then 
call __copy_tofrom_user(). In total it is 16.4% so it is worse than before.

     14.47%  test_perf           [.] engine_main
     12.00%  [kernel]            [k] finish_task_switch.isra.0
==>  8.37%  [kernel]            [k] snd_pcm_ioctl
      5.44%  libc.so.6           [.] __ioctl_time64
      5.03%  [kernel]            [k] snd_pcm_group_unlock_irq.part.0
==>  4.86%  [kernel]            [k] __copy_tofrom_user
      4.62%  [kernel]            [k] sys_ioctl
      3.22%  [kernel]            [k] system_call_exception
      2.42%  libasound.so.2.0.0  [.] snd_pcm_mmap_begin
      2.31%  [kernel]            [k] fdget
      2.23%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin_generic
      2.19%  [kernel]            [k] syscall_exit_prepare
      1.92%  libasound.so.2.0.0  [.] snd_pcm_mmap_avail
      1.86%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin
      1.68%  libasound.so.2.0.0  [.] snd_pcm_hw_avail_update
==>  1.67%  [kernel]            [k] _copy_from_user
      1.66%  libasound.so.2.0.0  [.] bad_pcm_state
==>  1.53%  [kernel]            [k] _copy_to_user
      1.40%  libasound.so.2.0.0  [.] sync_ptr1

With my patch I get the following:

     17.46%  test_perf           [.] engine_main
      9.14%  [kernel]            [k] finish_task_switch.isra.0
==>  4.92%  [kernel]            [k] snd_pcm_ioctl
      3.99%  [kernel]            [k] snd_pcm_group_unlock_irq.part.0
      3.71%  libc.so.6           [.] __ioctl_time64
      3.61%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin_generic
      2.72%  libasound.so.2.0.0  [.] sync_ptr1
      2.65%  [kernel]            [k] system_call_exception
      2.46%  [kernel]            [k] sys_ioctl
      2.43%  [kernel]            [k] __rseq_handle_notify_resume
      2.34%  [kernel]            [k] do_epoll_wait
      2.30%  libasound.so.2.0.0  [.] __snd_pcm_mmap_commit
      2.14%  libasound.so.2.0.0  [.] __snd_pcm_avail
      2.04%  libasound.so.2.0.0  [.] snd_pcm_hw_avail_update
      1.89%  libasound.so.2.0.0  [.] snd_pcm_lock
      1.84%  libasound.so.2.0.0  [.] snd_pcm_mmap_avail
      1.76%  libasound.so.2.0.0  [.] __snd_pcm_avail_update
      1.61%  libasound.so.2.0.0  [.] bad_pcm_state
      1.60%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin
      1.49%  libasound.so.2.0.0  [.] query_status_data

Christophe

Re: [PATCH] ALSA: pcm: Convert snd_pcm_ioctl_sync_ptr_{compat/x32} to user_access_begin/user_access_end()
Posted by Takashi Iwai 3 months, 4 weeks ago
On Fri, 13 Jun 2025 07:24:46 +0200,
Christophe Leroy wrote:
> 
> 
> 
> Le 12/06/2025 à 13:02, Takashi Iwai a écrit :
> > On Thu, 12 Jun 2025 12:39:39 +0200,
> > Christophe Leroy wrote:
> >> 
> >> With user access protection (Called SMAP on x86 or KUAP on powerpc)
> >> each and every call to get_user() or put_user() performs heavy
> >> operations to unlock and lock kernel access to userspace.
> >> 
> >> SNDRV_PCM_IOCTL_SYNC_PTR ioctl is a hot path that needs to be
> >> optimised. To do that, perform user accesses by blocks using
> >> user_access_begin/user_access_end() and unsafe_get_user()/
> >> unsafe_put_user() and alike.
> >> 
> >> Before the patch the 9 calls to put_user() at the
> >> end of snd_pcm_ioctl_sync_ptr_compat() imply the following set of
> >> instructions about 9 times (access_ok - enable user - write - disable
> >> user):
> >>      0.00 :   c057f858:       3d 20 7f ff     lis     r9,32767
> >>      0.29 :   c057f85c:       39 5e 00 14     addi    r10,r30,20
> >>      0.77 :   c057f860:       61 29 ff fc     ori     r9,r9,65532
> >>      0.32 :   c057f864:       7c 0a 48 40     cmplw   r10,r9
> >>      0.36 :   c057f868:       41 a1 fb 58     bgt     c057f3c0 <snd_pcm_ioctl+0xbb0>
> >>      0.30 :   c057f86c:       3d 20 dc 00     lis     r9,-9216
> >>      1.95 :   c057f870:       7d 3a c3 a6     mtspr   794,r9
> >>      0.33 :   c057f874:       92 8a 00 00     stw     r20,0(r10)
> >>      0.27 :   c057f878:       3d 20 de 00     lis     r9,-8704
> >>      0.28 :   c057f87c:       7d 3a c3 a6     mtspr   794,r9
> >> ...
> >> 
> >> A perf profile shows that in total the 9 put_user() represent 36% of
> >> the time spent in snd_pcm_ioctl() and about 80 instructions.
> >> 
> >> With this patch everything is done in 13 instructions and represent
> >> only 15% of the time spent in snd_pcm_ioctl():
> >> 
> >>      0.57 :   c057f5dc:       3d 20 dc 00     lis     r9,-9216
> >>      0.98 :   c057f5e0:       7d 3a c3 a6     mtspr   794,r9
> >>      0.16 :   c057f5e4:       92 7f 00 04     stw     r19,4(r31)
> >>      0.63 :   c057f5e8:       93 df 00 0c     stw     r30,12(r31)
> >>      0.16 :   c057f5ec:       93 9f 00 10     stw     r28,16(r31)
> >>      4.95 :   c057f5f0:       92 9f 00 14     stw     r20,20(r31)
> >>      0.19 :   c057f5f4:       92 5f 00 18     stw     r18,24(r31)
> >>      0.49 :   c057f5f8:       92 bf 00 1c     stw     r21,28(r31)
> >>      0.27 :   c057f5fc:       93 7f 00 20     stw     r27,32(r31)
> >>      5.88 :   c057f600:       93 36 00 00     stw     r25,0(r22)
> >>      0.11 :   c057f604:       93 17 00 00     stw     r24,0(r23)
> >>      0.00 :   c057f608:       3d 20 de 00     lis     r9,-8704
> >>      0.79 :   c057f60c:       7d 3a c3 a6     mtspr   794,r9
> >> 
> >> Note that here the access_ok() in user_write_access_begin() is skipped
> >> because the exact same verification has already been performed at the
> >> beginning of the fonction with the call to user_read_access_begin().
> >> 
> >> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> >> ---
> >> This is a lighter version of previous patch "[PATCH v2] ALSA: pcm: Convert multiple {get/put}_user to user_access_begin/user_access_end()" focussing on identified hot path.
> >> Moved and nested the failure labels closer in order to increase readability
> > 
> > Thanks for the revised patch!
> > 
> > Although it's now much lighter, I still believe that we can reduce
> > get_user() / put_user() calls significantly by adjusting the struct
> > usage.
> > 
> > Could you check whether the patch below can improve?
> > (Zero-ing of sstatus can be an overhead here, but there are some
> > holes, and these need to be initialized before copying back...)
> > 
> 
> Thanks for the proposed patch. Unfortunately it doesn't improve the
> situation. The problem with copy_from_user() and copy_to_user() is
> that they perform quite bad on small regions. And for the from_user
> side we still get two user access enable/disable instead 3 and for the
> to_user side we still get two as well (Allthough we had 7
> previously). Those 4 user access enable/disable still have a cost.
> 
> Nowadays the tendency is really to go for the unsafe_put/get_user
> style, see some significant exemples below. And as mentioned in those
> commits, behind the performance improvement it also lead to much
> cleaner code generation.
> - 38ebcf5096a8 ("scm: optimize put_cmsg()")
> - 9f79b78ef744 ("Convert filldir[64]() from __put_user() to
> unsafe_put_user()")
> - ef0ba0553829 ("poll: fix performance regression due to out-of-line
> __put_user()")
> 
> Commit 38ebcf5096a8 is explicit about copy_to_user() being bad for
> small regions.
> 
> Here below is some comparison between the three way of doing
> snd_pcm_ioctl_sync_ptr_compat(), output is from 'perf top':
> 
> Initially I got the following. That 12%+ is the reason why I started
> investigating.
> 
>     14.20%  test_perf           [.] engine_main
> ==> 12.86%  [kernel]            [k] snd_pcm_ioctl
>     11.91%  [kernel]            [k] finish_task_switch.isra.0
>      4.15%  [kernel]            [k] snd_pcm_group_unlock_irq.part.0
>      4.07%  libc.so.6           [.] __ioctl_time64
>      3.58%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin_generic
>      3.37%  [kernel]            [k] sys_ioctl
>      2.96%  libasound.so.2.0.0  [.] snd_pcm_hw_avail_update
>      2.73%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin
>      2.58%  [kernel]            [k] system_call_exception
>      1.93%  libasound.so.2.0.0  [.] sync_ptr1
>      1.85%  libasound.so.2.0.0  [.] snd_pcm_unlock
>      1.84%  libasound.so.2.0.0  [.] snd_pcm_mmap_begin
>      1.83%  libasound.so.2.0.0  [.] bad_pcm_state
>      1.68%  libasound.so.2.0.0  [.] snd_pcm_mmap_avail
>      1.67%  libasound.so.2.0.0  [.] snd_pcm_avail_update
> 
> With _your_ patch I get the following. copy_from_user() calls
> _copy_from_user() and copy_to_user() calls _copy_to_user(). Both then
> call __copy_tofrom_user(). In total it is 16.4% so it is worse than
> before.
> 
>     14.47%  test_perf           [.] engine_main
>     12.00%  [kernel]            [k] finish_task_switch.isra.0
> ==>  8.37%  [kernel]            [k] snd_pcm_ioctl
>      5.44%  libc.so.6           [.] __ioctl_time64
>      5.03%  [kernel]            [k] snd_pcm_group_unlock_irq.part.0
> ==>  4.86%  [kernel]            [k] __copy_tofrom_user
>      4.62%  [kernel]            [k] sys_ioctl
>      3.22%  [kernel]            [k] system_call_exception
>      2.42%  libasound.so.2.0.0  [.] snd_pcm_mmap_begin
>      2.31%  [kernel]            [k] fdget
>      2.23%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin_generic
>      2.19%  [kernel]            [k] syscall_exit_prepare
>      1.92%  libasound.so.2.0.0  [.] snd_pcm_mmap_avail
>      1.86%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin
>      1.68%  libasound.so.2.0.0  [.] snd_pcm_hw_avail_update
> ==>  1.67%  [kernel]            [k] _copy_from_user
>      1.66%  libasound.so.2.0.0  [.] bad_pcm_state
> ==>  1.53%  [kernel]            [k] _copy_to_user
>      1.40%  libasound.so.2.0.0  [.] sync_ptr1
> 
> With my patch I get the following:
> 
>     17.46%  test_perf           [.] engine_main
>      9.14%  [kernel]            [k] finish_task_switch.isra.0
> ==>  4.92%  [kernel]            [k] snd_pcm_ioctl
>      3.99%  [kernel]            [k] snd_pcm_group_unlock_irq.part.0
>      3.71%  libc.so.6           [.] __ioctl_time64
>      3.61%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin_generic
>      2.72%  libasound.so.2.0.0  [.] sync_ptr1
>      2.65%  [kernel]            [k] system_call_exception
>      2.46%  [kernel]            [k] sys_ioctl
>      2.43%  [kernel]            [k] __rseq_handle_notify_resume
>      2.34%  [kernel]            [k] do_epoll_wait
>      2.30%  libasound.so.2.0.0  [.] __snd_pcm_mmap_commit
>      2.14%  libasound.so.2.0.0  [.] __snd_pcm_avail
>      2.04%  libasound.so.2.0.0  [.] snd_pcm_hw_avail_update
>      1.89%  libasound.so.2.0.0  [.] snd_pcm_lock
>      1.84%  libasound.so.2.0.0  [.] snd_pcm_mmap_avail
>      1.76%  libasound.so.2.0.0  [.] __snd_pcm_avail_update
>      1.61%  libasound.so.2.0.0  [.] bad_pcm_state
>      1.60%  libasound.so.2.0.0  [.] __snd_pcm_mmap_begin
>      1.49%  libasound.so.2.0.0  [.] query_status_data

Thanks for the detailed analysis!  Then unsafe_*_user() seems to be
the way to go.  I'll check your latest patches.


Takashi