[PATCH 2/3] system/memory: Extract 'qemu/memory_ldst_unaligned.h' header

Philippe Mathieu-Daudé posted 3 patches 1 week, 2 days ago
[PATCH 2/3] system/memory: Extract 'qemu/memory_ldst_unaligned.h' header
Posted by Philippe Mathieu-Daudé 1 week, 2 days ago
Unaligned memcpy API is buried within 'qemu/bswap.h',
supposed to be related to endianness swapping. Extract
to a new header to clarify.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
 include/qemu/bswap.h                 | 62 +------------------------
 include/qemu/memory_ldst_unaligned.h | 67 ++++++++++++++++++++++++++++
 accel/tcg/translator.c               |  1 +
 hw/display/ati_2d.c                  |  1 +
 hw/display/sm501.c                   |  2 +-
 hw/remote/vfio-user-obj.c            |  1 +
 hw/vmapple/virtio-blk.c              |  1 +
 net/checksum.c                       |  1 +
 ui/vnc-enc-tight.c                   |  1 +
 util/bufferiszero.c                  |  2 +-
 10 files changed, 76 insertions(+), 63 deletions(-)
 create mode 100644 include/qemu/memory_ldst_unaligned.h

diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
index b77ea955de5..e8b944988c3 100644
--- a/include/qemu/bswap.h
+++ b/include/qemu/bswap.h
@@ -1,6 +1,7 @@
 #ifndef BSWAP_H
 #define BSWAP_H
 
+#include "qemu/memory_ldst_unaligned.h"
 #include "qemu/target-info.h"
 
 #undef  bswap16
@@ -173,8 +174,6 @@ CPU_CONVERT(le, 64, uint64_t)
 # define const_le16(_x) (_x)
 #endif
 
-/* unaligned/endian-independent pointer access */
-
 /*
  * the generic syntax is:
  *
@@ -201,7 +200,6 @@ CPU_CONVERT(le, 64, uint64_t)
  *   q: 64 bits
  *
  * endian is:
- *   he   : host endian
  *   be   : big endian
  *   le   : little endian
  *   te   : target endian
@@ -237,64 +235,6 @@ static inline void stb_p(void *ptr, uint8_t v)
     *(uint8_t *)ptr = v;
 }
 
-/*
- * Any compiler worth its salt will turn these memcpy into native unaligned
- * operations.  Thus we don't need to play games with packed attributes, or
- * inline byte-by-byte stores.
- * Some compilation environments (eg some fortify-source implementations)
- * may intercept memcpy() in a way that defeats the compiler optimization,
- * though, so we use __builtin_memcpy() to give ourselves the best chance
- * of good performance.
- */
-
-static inline int lduw_he_p(const void *ptr)
-{
-    uint16_t r;
-    __builtin_memcpy(&r, ptr, sizeof(r));
-    return r;
-}
-
-static inline int ldsw_he_p(const void *ptr)
-{
-    int16_t r;
-    __builtin_memcpy(&r, ptr, sizeof(r));
-    return r;
-}
-
-static inline void stw_he_p(void *ptr, uint16_t v)
-{
-    __builtin_memcpy(ptr, &v, sizeof(v));
-}
-
-static inline void st24_he_p(void *ptr, uint32_t v)
-{
-    __builtin_memcpy(ptr, &v, 3);
-}
-
-static inline int ldl_he_p(const void *ptr)
-{
-    int32_t r;
-    __builtin_memcpy(&r, ptr, sizeof(r));
-    return r;
-}
-
-static inline void stl_he_p(void *ptr, uint32_t v)
-{
-    __builtin_memcpy(ptr, &v, sizeof(v));
-}
-
-static inline uint64_t ldq_he_p(const void *ptr)
-{
-    uint64_t r;
-    __builtin_memcpy(&r, ptr, sizeof(r));
-    return r;
-}
-
-static inline void stq_he_p(void *ptr, uint64_t v)
-{
-    __builtin_memcpy(ptr, &v, sizeof(v));
-}
-
 static inline int lduw_le_p(const void *ptr)
 {
     return (uint16_t)le_bswap(lduw_he_p(ptr), 16);
diff --git a/include/qemu/memory_ldst_unaligned.h b/include/qemu/memory_ldst_unaligned.h
new file mode 100644
index 00000000000..f6b64e8fe9c
--- /dev/null
+++ b/include/qemu/memory_ldst_unaligned.h
@@ -0,0 +1,67 @@
+/*
+ * QEMU unaligned/endian-independent pointer access
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+*/
+#ifndef QEMU_MEMORY_LDST_UNALIGNED_H
+#define QEMU_MEMORY_LDST_UNALIGNED_H
+
+/*
+ * Any compiler worth its salt will turn these memcpy into native unaligned
+ * operations.  Thus we don't need to play games with packed attributes, or
+ * inline byte-by-byte stores.
+ * Some compilation environments (eg some fortify-source implementations)
+ * may intercept memcpy() in a way that defeats the compiler optimization,
+ * though, so we use __builtin_memcpy() to give ourselves the best chance
+ * of good performance.
+ */
+
+static inline int lduw_he_p(const void *ptr)
+{
+    uint16_t r;
+    __builtin_memcpy(&r, ptr, sizeof(r));
+    return r;
+}
+
+static inline int ldsw_he_p(const void *ptr)
+{
+    int16_t r;
+    __builtin_memcpy(&r, ptr, sizeof(r));
+    return r;
+}
+
+static inline void stw_he_p(void *ptr, uint16_t v)
+{
+    __builtin_memcpy(ptr, &v, sizeof(v));
+}
+
+static inline void st24_he_p(void *ptr, uint32_t v)
+{
+    __builtin_memcpy(ptr, &v, 3);
+}
+
+static inline int ldl_he_p(const void *ptr)
+{
+    int32_t r;
+    __builtin_memcpy(&r, ptr, sizeof(r));
+    return r;
+}
+
+static inline void stl_he_p(void *ptr, uint32_t v)
+{
+    __builtin_memcpy(ptr, &v, sizeof(v));
+}
+
+static inline uint64_t ldq_he_p(const void *ptr)
+{
+    uint64_t r;
+    __builtin_memcpy(&r, ptr, sizeof(r));
+    return r;
+}
+
+static inline void stq_he_p(void *ptr, uint64_t v)
+{
+    __builtin_memcpy(ptr, &v, sizeof(v));
+}
+
+#endif
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 034f2f359ef..86cdd70c47f 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -9,6 +9,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/bswap.h"
+#include "qemu/memory_ldst_unaligned.h"
 #include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "accel/tcg/cpu-ldst-common.h"
diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
index 309bb5ccb6c..24a3c3e942f 100644
--- a/hw/display/ati_2d.c
+++ b/hw/display/ati_2d.c
@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 #include "ati_int.h"
 #include "ati_regs.h"
+#include "qemu/memory_ldst_unaligned.h"
 #include "qemu/log.h"
 #include "ui/pixel_ops.h"
 #include "ui/console.h"
diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index bc091b3c9fb..51efe2ad41f 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -28,6 +28,7 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
+#include "qemu/memory_ldst_unaligned.h"
 #include "qemu/module.h"
 #include "hw/usb/hcd-ohci.h"
 #include "hw/char/serial-mm.h"
@@ -40,7 +41,6 @@
 #include "hw/display/i2c-ddc.h"
 #include "qemu/range.h"
 #include "ui/pixel_ops.h"
-#include "qemu/bswap.h"
 #include "trace.h"
 #include "qom/object.h"
 
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index 216b4876e24..9c9ac8b0d92 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -49,6 +49,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-visit-sockets.h"
 #include "qapi/qapi-events-misc.h"
+#include "qemu/memory_ldst_unaligned.h"
 #include "qemu/notify.h"
 #include "qemu/thread.h"
 #include "qemu/main-loop.h"
diff --git a/hw/vmapple/virtio-blk.c b/hw/vmapple/virtio-blk.c
index 9de9aaae0bf..f5e8e92df75 100644
--- a/hw/vmapple/virtio-blk.c
+++ b/hw/vmapple/virtio-blk.c
@@ -19,6 +19,7 @@
 #include "hw/vmapple/vmapple.h"
 #include "hw/virtio/virtio-blk.h"
 #include "hw/virtio/virtio-pci.h"
+#include "qemu/memory_ldst_unaligned.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
diff --git a/net/checksum.c b/net/checksum.c
index 537457d89d0..18be31c26e5 100644
--- a/net/checksum.c
+++ b/net/checksum.c
@@ -16,6 +16,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/memory_ldst_unaligned.h"
 #include "net/checksum.h"
 #include "net/eth.h"
 
diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c
index 9dfe6ae5a24..b645aebccef 100644
--- a/ui/vnc-enc-tight.c
+++ b/ui/vnc-enc-tight.c
@@ -42,6 +42,7 @@
 #include <jpeglib.h>
 #endif
 
+#include "qemu/memory_ldst_unaligned.h"
 #include "qemu/bswap.h"
 #include "vnc.h"
 #include "vnc-enc-tight.h"
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 522146dab97..9548dd3ad1b 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -23,7 +23,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
-#include "qemu/bswap.h"
+#include "qemu/memory_ldst_unaligned.h"
 #include "host/cpuinfo.h"
 
 typedef bool (*biz_accel_fn)(const void *, size_t);
-- 
2.52.0


Re: [PATCH 2/3] system/memory: Extract 'qemu/memory_ldst_unaligned.h' header
Posted by BALATON Zoltan 1 week, 2 days ago
On Sun, 28 Dec 2025, Philippe Mathieu-Daudé wrote:
> Unaligned memcpy API is buried within 'qemu/bswap.h',
> supposed to be related to endianness swapping. Extract
> to a new header to clarify.
>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
> include/qemu/bswap.h                 | 62 +------------------------
> include/qemu/memory_ldst_unaligned.h | 67 ++++++++++++++++++++++++++++
> accel/tcg/translator.c               |  1 +
> hw/display/ati_2d.c                  |  1 +
> hw/display/sm501.c                   |  2 +-
> hw/remote/vfio-user-obj.c            |  1 +
> hw/vmapple/virtio-blk.c              |  1 +
> net/checksum.c                       |  1 +
> ui/vnc-enc-tight.c                   |  1 +
> util/bufferiszero.c                  |  2 +-
> 10 files changed, 76 insertions(+), 63 deletions(-)
> create mode 100644 include/qemu/memory_ldst_unaligned.h
>
> diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
> index b77ea955de5..e8b944988c3 100644
> --- a/include/qemu/bswap.h
> +++ b/include/qemu/bswap.h
> @@ -1,6 +1,7 @@
> #ifndef BSWAP_H
> #define BSWAP_H
>
> +#include "qemu/memory_ldst_unaligned.h"

If it's included here do users need to also include it separately or if so 
should some of those users lose bswap.h include now instead of including 
both this header and bswap.h? I think it's simpler to only include it here 
and let users get it through bswap.h unless you review and remove now 
unnecessary bswap.h includes from places that only need this hearder but 
I don't know if that's worth the effort.

Regards,
BALATON Zoltan
Re: [PATCH 2/3] system/memory: Extract 'qemu/memory_ldst_unaligned.h' header
Posted by Philippe Mathieu-Daudé 1 week, 2 days ago
Hi Zoltan,

On 28/12/25 17:46, BALATON Zoltan wrote:
> On Sun, 28 Dec 2025, Philippe Mathieu-Daudé wrote:
>> Unaligned memcpy API is buried within 'qemu/bswap.h',
>> supposed to be related to endianness swapping. Extract
>> to a new header to clarify.
>>
>> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
>> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>> ---
>> include/qemu/bswap.h                 | 62 +------------------------
>> include/qemu/memory_ldst_unaligned.h | 67 ++++++++++++++++++++++++++++
>> accel/tcg/translator.c               |  1 +
>> hw/display/ati_2d.c                  |  1 +
>> hw/display/sm501.c                   |  2 +-
>> hw/remote/vfio-user-obj.c            |  1 +
>> hw/vmapple/virtio-blk.c              |  1 +
>> net/checksum.c                       |  1 +
>> ui/vnc-enc-tight.c                   |  1 +
>> util/bufferiszero.c                  |  2 +-
>> 10 files changed, 76 insertions(+), 63 deletions(-)
>> create mode 100644 include/qemu/memory_ldst_unaligned.h
>>
>> diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
>> index b77ea955de5..e8b944988c3 100644
>> --- a/include/qemu/bswap.h
>> +++ b/include/qemu/bswap.h
>> @@ -1,6 +1,7 @@
>> #ifndef BSWAP_H
>> #define BSWAP_H
>>
>> +#include "qemu/memory_ldst_unaligned.h"
> 
> If it's included here do users need to also include it separately or if 
> so should some of those users lose bswap.h include now instead of 
> including both this header and bswap.h? I think it's simpler to only 
> include it here and let users get it through bswap.h unless you review 
> and remove now unnecessary bswap.h includes from places that only need 
> this hearder but I don't know if that's worth the effort.

bswap API users have to include <qemu/bswap.h>,
users of ld/st_unaligned() one have to include 
<qemu/memory_ldst_unaligned.h>.

This is a one to one match, what is the problem?


Re: [PATCH 2/3] system/memory: Extract 'qemu/memory_ldst_unaligned.h' header
Posted by Philippe Mathieu-Daudé 1 week ago
On 28/12/25 21:04, Philippe Mathieu-Daudé wrote:
> Hi Zoltan,
> 
> On 28/12/25 17:46, BALATON Zoltan wrote:
>> On Sun, 28 Dec 2025, Philippe Mathieu-Daudé wrote:
>>> Unaligned memcpy API is buried within 'qemu/bswap.h',
>>> supposed to be related to endianness swapping. Extract
>>> to a new header to clarify.
>>>
>>> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
>>> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>>> ---
>>> include/qemu/bswap.h                 | 62 +------------------------
>>> include/qemu/memory_ldst_unaligned.h | 67 ++++++++++++++++++++++++++++
>>> accel/tcg/translator.c               |  1 +
>>> hw/display/ati_2d.c                  |  1 +
>>> hw/display/sm501.c                   |  2 +-
>>> hw/remote/vfio-user-obj.c            |  1 +
>>> hw/vmapple/virtio-blk.c              |  1 +
>>> net/checksum.c                       |  1 +
>>> ui/vnc-enc-tight.c                   |  1 +
>>> util/bufferiszero.c                  |  2 +-
>>> 10 files changed, 76 insertions(+), 63 deletions(-)
>>> create mode 100644 include/qemu/memory_ldst_unaligned.h
>>>
>>> diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
>>> index b77ea955de5..e8b944988c3 100644
>>> --- a/include/qemu/bswap.h
>>> +++ b/include/qemu/bswap.h
>>> @@ -1,6 +1,7 @@
>>> #ifndef BSWAP_H
>>> #define BSWAP_H
>>>
>>> +#include "qemu/memory_ldst_unaligned.h"
>>
>> If it's included here do users need to also include it separately or 
>> if so should some of those users lose bswap.h include now instead of 
>> including both this header and bswap.h? I think it's simpler to only 
>> include it here and let users get it through bswap.h unless you review 
>> and remove now unnecessary bswap.h includes from places that only need 
>> this hearder but I don't know if that's worth the effort.
> 
> bswap API users have to include <qemu/bswap.h>,
> users of ld/st_unaligned() one have to include <qemu/ 
> memory_ldst_unaligned.h>.

Let me try again.

Users of the ld/st_unaligned() API have to include
<qemu/ldst_unaligned.h>; if they don't use <qemu/bswap.h>
then first it is pointless to include it, but furthermore
it saves few compilation cycles for the unused inlined
bswap functions.

Users of the bswap() API have to include <qemu/bswap.h>
(which indirectly includes <qemu/ldst_unaligned.h>, but
this is irrelevant).

Users of both the bswap() and ld/st_unaligned() APIs have
to include both <qemu/bswap.h> AND <qemu/ldst_unaligned.h>,
even if it is indirectly included. That makes the use of
APIs more explicit in the source file, and furthermore it
avoids code churn when indirect headers are reworked.

For recent examples see commits 34bcd8f4ff9 ("hw/int/loongarch:
Include missing 'system/memory.h' header") or fdda97b47e6
("hw/arm/virt-acpi-build: Include missing 'cpu.h' header").

Is it clearer?

Regards,

Phil.

Re: [PATCH 2/3] system/memory: Extract 'qemu/memory_ldst_unaligned.h' header
Posted by BALATON Zoltan 1 week ago
On Tue, 30 Dec 2025, Philippe Mathieu-Daudé wrote:
> On 28/12/25 21:04, Philippe Mathieu-Daudé wrote:
>> Hi Zoltan,
>> 
>> On 28/12/25 17:46, BALATON Zoltan wrote:
>>> On Sun, 28 Dec 2025, Philippe Mathieu-Daudé wrote:
>>>> Unaligned memcpy API is buried within 'qemu/bswap.h',
>>>> supposed to be related to endianness swapping. Extract
>>>> to a new header to clarify.
>>>> 
>>>> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
>>>> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>>>> ---
>>>> include/qemu/bswap.h                 | 62 +------------------------
>>>> include/qemu/memory_ldst_unaligned.h | 67 ++++++++++++++++++++++++++++
>>>> accel/tcg/translator.c               |  1 +
>>>> hw/display/ati_2d.c                  |  1 +
>>>> hw/display/sm501.c                   |  2 +-
>>>> hw/remote/vfio-user-obj.c            |  1 +
>>>> hw/vmapple/virtio-blk.c              |  1 +
>>>> net/checksum.c                       |  1 +
>>>> ui/vnc-enc-tight.c                   |  1 +
>>>> util/bufferiszero.c                  |  2 +-
>>>> 10 files changed, 76 insertions(+), 63 deletions(-)
>>>> create mode 100644 include/qemu/memory_ldst_unaligned.h
>>>> 
>>>> diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
>>>> index b77ea955de5..e8b944988c3 100644
>>>> --- a/include/qemu/bswap.h
>>>> +++ b/include/qemu/bswap.h
>>>> @@ -1,6 +1,7 @@
>>>> #ifndef BSWAP_H
>>>> #define BSWAP_H
>>>> 
>>>> +#include "qemu/memory_ldst_unaligned.h"
>>> 
>>> If it's included here do users need to also include it separately or if so 
>>> should some of those users lose bswap.h include now instead of including 
>>> both this header and bswap.h? I think it's simpler to only include it here 
>>> and let users get it through bswap.h unless you review and remove now 
>>> unnecessary bswap.h includes from places that only need this hearder but I 
>>> don't know if that's worth the effort.
>> 
>> bswap API users have to include <qemu/bswap.h>,
>> users of ld/st_unaligned() one have to include <qemu/ 
>> memory_ldst_unaligned.h>.
>
> Let me try again.
>
> Users of the ld/st_unaligned() API have to include
> <qemu/ldst_unaligned.h>; if they don't use <qemu/bswap.h>
> then first it is pointless to include it, but furthermore
> it saves few compilation cycles for the unused inlined
> bswap functions.
>
> Users of the bswap() API have to include <qemu/bswap.h>
> (which indirectly includes <qemu/ldst_unaligned.h>, but
> this is irrelevant).
>
> Users of both the bswap() and ld/st_unaligned() APIs have
> to include both <qemu/bswap.h> AND <qemu/ldst_unaligned.h>,
> even if it is indirectly included. That makes the use of
> APIs more explicit in the source file, and furthermore it
> avoids code churn when indirect headers are reworked.

Your patch added #include of the new header to 8 files so this is already 
causing churn in the hope of avoiding it.

> For recent examples see commits 34bcd8f4ff9 ("hw/int/loongarch:
> Include missing 'system/memory.h' header") or fdda97b47e6
> ("hw/arm/virt-acpi-build: Include missing 'cpu.h' header").
>
> Is it clearer?

I did not notice that you removed bswap.h from 2 files and thought the 
patch only added the new header. In that case my comment does not apply 
but I still see this more as a churn than useful cleanup. OK, host endian 
does not swap but if you consider that the le and be functions in bswap.h 
are based on the he functions then you can just leave these there. If it 
bothers you add a comment, that's still less churn.

Regards,
BALATON Zoltan