[PATCH v4 5/9] target/arm: Make some MTE helpers widely available

Gustavo Romero posted 9 patches 5 months ago
There is a newer version of this series
[PATCH v4 5/9] target/arm: Make some MTE helpers widely available
Posted by Gustavo Romero 5 months ago
Make the MTE helpers allocation_tag_mem_probe, load_tag1, and store_tag1
available to other subsystems.

Signed-off-by: Gustavo Romero <gustavo.romero@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/mte_helper.c | 54 +++------------------------
 target/arm/tcg/mte_helper.h | 74 +++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 49 deletions(-)
 create mode 100644 target/arm/tcg/mte_helper.h

diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
index a50d576294..da6bc72b9c 100644
--- a/target/arm/tcg/mte_helper.c
+++ b/target/arm/tcg/mte_helper.c
@@ -29,6 +29,7 @@
 #include "hw/core/tcg-cpu-ops.h"
 #include "qapi/error.h"
 #include "qemu/guest-random.h"
+#include "mte_helper.h"
 
 
 static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
@@ -50,42 +51,10 @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
     return tag;
 }
 
-/**
- * allocation_tag_mem_probe:
- * @env: the cpu environment
- * @ptr_mmu_idx: the addressing regime to use for the virtual address
- * @ptr: the virtual address for which to look up tag memory
- * @ptr_access: the access to use for the virtual address
- * @ptr_size: the number of bytes in the normal memory access
- * @tag_access: the access to use for the tag memory
- * @probe: true to merely probe, never taking an exception
- * @ra: the return address for exception handling
- *
- * Our tag memory is formatted as a sequence of little-endian nibbles.
- * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
- * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
- * for the higher addr.
- *
- * Here, resolve the physical address from the virtual address, and return
- * a pointer to the corresponding tag byte.
- *
- * If there is no tag storage corresponding to @ptr, return NULL.
- *
- * If the page is inaccessible for @ptr_access, or has a watchpoint, there are
- * three options:
- * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not
- *     accessible, and do not take watchpoint traps. The calling code must
- *     handle those cases in the right priority compared to MTE traps.
- * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees
- *     that the page is going to be accessible. We will take watchpoint traps.
- * (3) probe = false, ra != 0 : non-probe -- we will take both memory access
- *     traps and watchpoint traps.
- * (probe = true, ra != 0 is invalid and will assert.)
- */
-static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
-                                         uint64_t ptr, MMUAccessType ptr_access,
-                                         int ptr_size, MMUAccessType tag_access,
-                                         bool probe, uintptr_t ra)
+uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
+                                  uint64_t ptr, MMUAccessType ptr_access,
+                                  int ptr_size, MMUAccessType tag_access,
+                                  bool probe, uintptr_t ra)
 {
 #ifdef CONFIG_USER_ONLY
     uint64_t clean_ptr = useronly_clean_ptr(ptr);
@@ -287,12 +256,6 @@ uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr,
     return address_with_allocation_tag(ptr + offset, rtag);
 }
 
-static int load_tag1(uint64_t ptr, uint8_t *mem)
-{
-    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
-    return extract32(*mem, ofs, 4);
-}
-
 uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
 {
     int mmu_idx = arm_env_mmu_index(env);
@@ -320,13 +283,6 @@ static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra)
     }
 }
 
-/* For use in a non-parallel context, store to the given nibble.  */
-static void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
-{
-    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
-    *mem = deposit32(*mem, ofs, 4, tag);
-}
-
 /* For use in a parallel context, atomically store to the given nibble.  */
 static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag)
 {
diff --git a/target/arm/tcg/mte_helper.h b/target/arm/tcg/mte_helper.h
new file mode 100644
index 0000000000..6a82ff3403
--- /dev/null
+++ b/target/arm/tcg/mte_helper.h
@@ -0,0 +1,74 @@
+/*
+ * ARM MemTag operation helpers.
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef TARGET_ARM_MTE_H
+#define TARGET_ARM_MTE_H
+
+/**
+ * allocation_tag_mem_probe:
+ * @env: the cpu environment
+ * @ptr_mmu_idx: the addressing regime to use for the virtual address
+ * @ptr: the virtual address for which to look up tag memory
+ * @ptr_access: the access to use for the virtual address
+ * @ptr_size: the number of bytes in the normal memory access
+ * @tag_access: the access to use for the tag memory
+ * @probe: true to merely probe, never taking an exception
+ * @ra: the return address for exception handling
+ *
+ * Our tag memory is formatted as a sequence of little-endian nibbles.
+ * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
+ * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
+ * for the higher addr.
+ *
+ * Here, resolve the physical address from the virtual address, and return
+ * a pointer to the corresponding tag byte.
+ *
+ * If there is no tag storage corresponding to @ptr, return NULL.
+ *
+ * If the page is inaccessible for @ptr_access, or has a watchpoint, there are
+ * three options:
+ * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not
+ *     accessible, and do not take watchpoint traps. The calling code must
+ *     handle those cases in the right priority compared to MTE traps.
+ * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees
+ *     that the page is going to be accessible. We will take watchpoint traps.
+ * (3) probe = false, ra != 0 : non-probe -- we will take both memory access
+ *     traps and watchpoint traps.
+ * (probe = true, ra != 0 is invalid and will assert.)
+ */
+uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
+                                  uint64_t ptr, MMUAccessType ptr_access,
+                                  int ptr_size, MMUAccessType tag_access,
+                                  bool probe, uintptr_t ra);
+
+/**
+ * load_tag1 - Load 1 tag (nibble) from byte
+ * @ptr: The tagged address
+ * @mem: The tag address (packed, 2 tags in byte)
+ */
+static inline int load_tag1(uint64_t ptr, uint8_t *mem)
+{
+    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+    return extract32(*mem, ofs, 4);
+}
+
+/**
+ * store_tag1 - Store 1 tag (nibble) into byte
+ * @ptr: The tagged address
+ * @mem: The tag address (packed, 2 tags in byte)
+ * @tag: The tag to be stored in the nibble
+ *
+ * For use in a non-parallel context, store to the given nibble.
+ */
+static inline void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
+{
+    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+    *mem = deposit32(*mem, ofs, 4, tag);
+}
+
+#endif /* TARGET_ARM_MTE_H */
-- 
2.34.1
Re: [PATCH v4 5/9] target/arm: Make some MTE helpers widely available
Posted by Philippe Mathieu-Daudé 5 months ago
Hi Gustavo,

On 24/6/24 07:30, Gustavo Romero wrote:
> Make the MTE helpers allocation_tag_mem_probe, load_tag1, and store_tag1
> available to other subsystems.

Again, you can make them available externally by removing the
static scope. I'm not keen anymore on inline function definitions,
please justify why you need them. Inline functions often requiere
more headers to be pulled in, and behind the preprocessing overhead,
it makes header maintenance more painful.

> Signed-off-by: Gustavo Romero <gustavo.romero@linaro.org>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/arm/tcg/mte_helper.c | 54 +++------------------------
>   target/arm/tcg/mte_helper.h | 74 +++++++++++++++++++++++++++++++++++++
>   2 files changed, 79 insertions(+), 49 deletions(-)
>   create mode 100644 target/arm/tcg/mte_helper.h


> diff --git a/target/arm/tcg/mte_helper.h b/target/arm/tcg/mte_helper.h
> new file mode 100644
> index 0000000000..6a82ff3403
> --- /dev/null
> +++ b/target/arm/tcg/mte_helper.h
> @@ -0,0 +1,74 @@
> +/*
> + * ARM MemTag operation helpers.
> + *
> + * This code is licensed under the GNU GPL v2 or later.
> + *
> + * SPDX-License-Identifier: LGPL-2.1-or-later
> + */
> +
> +#ifndef TARGET_ARM_MTE_H
> +#define TARGET_ARM_MTE_H
> +
> +/**
> + * allocation_tag_mem_probe:
> + * @env: the cpu environment
> + * @ptr_mmu_idx: the addressing regime to use for the virtual address
> + * @ptr: the virtual address for which to look up tag memory
> + * @ptr_access: the access to use for the virtual address
> + * @ptr_size: the number of bytes in the normal memory access
> + * @tag_access: the access to use for the tag memory
> + * @probe: true to merely probe, never taking an exception
> + * @ra: the return address for exception handling
> + *
> + * Our tag memory is formatted as a sequence of little-endian nibbles.
> + * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
> + * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
> + * for the higher addr.
> + *
> + * Here, resolve the physical address from the virtual address, and return
> + * a pointer to the corresponding tag byte.
> + *
> + * If there is no tag storage corresponding to @ptr, return NULL.
> + *
> + * If the page is inaccessible for @ptr_access, or has a watchpoint, there are
> + * three options:
> + * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not
> + *     accessible, and do not take watchpoint traps. The calling code must
> + *     handle those cases in the right priority compared to MTE traps.
> + * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees
> + *     that the page is going to be accessible. We will take watchpoint traps.
> + * (3) probe = false, ra != 0 : non-probe -- we will take both memory access
> + *     traps and watchpoint traps.
> + * (probe = true, ra != 0 is invalid and will assert.)
> + */
> +uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
> +                                  uint64_t ptr, MMUAccessType ptr_access,
> +                                  int ptr_size, MMUAccessType tag_access,
> +                                  bool probe, uintptr_t ra);

Missing "exec/mmu-access-type.h" header.

> +
> +/**
> + * load_tag1 - Load 1 tag (nibble) from byte
> + * @ptr: The tagged address
> + * @mem: The tag address (packed, 2 tags in byte)
> + */
> +static inline int load_tag1(uint64_t ptr, uint8_t *mem)
> +{
> +    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
> +    return extract32(*mem, ofs, 4);
> +}
> +
> +/**
> + * store_tag1 - Store 1 tag (nibble) into byte
> + * @ptr: The tagged address
> + * @mem: The tag address (packed, 2 tags in byte)
> + * @tag: The tag to be stored in the nibble
> + *
> + * For use in a non-parallel context, store to the given nibble.
> + */
> +static inline void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
> +{
> +    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
> +    *mem = deposit32(*mem, ofs, 4, tag);

If you want them inlined, then this is also missing the "qemu/bitops.h"
header.

> +}
> +
> +#endif /* TARGET_ARM_MTE_H */
Re: [PATCH v4 5/9] target/arm: Make some MTE helpers widely available
Posted by Gustavo Romero 5 months ago
Hi Phil, Richard

On 6/24/24 4:47 AM, Philippe Mathieu-Daudé wrote:
> Hi Gustavo,
> 
> On 24/6/24 07:30, Gustavo Romero wrote:
>> Make the MTE helpers allocation_tag_mem_probe, load_tag1, and store_tag1
>> available to other subsystems.
> 
> Again, you can make them available externally by removing the
> static scope. I'm not keen anymore on inline function definitions,
> please justify why you need them. Inline functions often requiere
> more headers to be pulled in, and behind the preprocessing overhead,
> it makes header maintenance more painful.

@Phil Thanks for the clarifications. I understand your point now. I've
removed all inlined functions in v5 [0] [1].

@Richard You have already reviewed one patch in the series that had two
inlined functions, load_tag1 and store_tag1 [0], but as per Phil's
request above I've changed them to be non-inlined. I kept your R-b tho,
so let me know if you're still good with it.


Cheers,
Gustavo

v5:

[0] https://lists.gnu.org/archive/html/qemu-devel/2024-06/msg05089.html
[1] https://lists.gnu.org/archive/html/qemu-devel/2024-06/msg05092.html

Re: [PATCH v4 5/9] target/arm: Make some MTE helpers widely available
Posted by Philippe Mathieu-Daudé 5 months ago
On 24/6/24 09:47, Philippe Mathieu-Daudé wrote:
> Hi Gustavo,
> 
> On 24/6/24 07:30, Gustavo Romero wrote:
>> Make the MTE helpers allocation_tag_mem_probe, load_tag1, and store_tag1
>> available to other subsystems.
> 
> Again, you can make them available externally by removing the
> static scope. I'm not keen anymore on inline function definitions,
> please justify why you need them. Inline functions often requiere
> more headers to be pulled in, and behind the preprocessing overhead,
> it makes header maintenance more painful.

If my comment is unclear and you don't have any strong justification
to add inlined declarations, what I am requesting is:

-- >8 --
diff --git a/target/arm/tcg/mte_helper.h b/target/arm/tcg/mte_helper.h
new file mode 100644
index 0000000000..0e7a67cf0f
--- /dev/null
+++ b/target/arm/tcg/mte_helper.h
@@ -0,0 +1,68 @@
+/*
+ * ARM MemTag operation helpers.
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef TARGET_ARM_MTE_H
+#define TARGET_ARM_MTE_H
+
+#include "exec/mmu-access-type.h"
+
+/**
+ * allocation_tag_mem_probe:
+ * @env: the cpu environment
+ * @ptr_mmu_idx: the addressing regime to use for the virtual address
+ * @ptr: the virtual address for which to look up tag memory
+ * @ptr_access: the access to use for the virtual address
+ * @ptr_size: the number of bytes in the normal memory access
+ * @tag_access: the access to use for the tag memory
+ * @probe: true to merely probe, never taking an exception
+ * @ra: the return address for exception handling
+ *
+ * Our tag memory is formatted as a sequence of little-endian nibbles.
+ * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
+ * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
+ * for the higher addr.
+ *
+ * Here, resolve the physical address from the virtual address, and return
+ * a pointer to the corresponding tag byte.
+ *
+ * If there is no tag storage corresponding to @ptr, return NULL.
+ *
+ * If the page is inaccessible for @ptr_access, or has a watchpoint, 
there are
+ * three options:
+ * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page 
is not
+ *     accessible, and do not take watchpoint traps. The calling code must
+ *     handle those cases in the right priority compared to MTE traps.
+ * (2) probe = false, ra = 0 : probe, no fault expected -- the caller 
guarantees
+ *     that the page is going to be accessible. We will take watchpoint 
traps.
+ * (3) probe = false, ra != 0 : non-probe -- we will take both memory 
access
+ *     traps and watchpoint traps.
+ * (probe = true, ra != 0 is invalid and will assert.)
+ */
+uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
+                                  uint64_t ptr, MMUAccessType ptr_access,
+                                  int ptr_size, MMUAccessType tag_access,
+                                  bool probe, uintptr_t ra);
+
+/**
+ * load_tag1 - Load 1 tag (nibble) from byte
+ * @ptr: The tagged address
+ * @mem: The tag address (packed, 2 tags in byte)
+ */
+int load_tag1(uint64_t ptr, uint8_t *mem);
+
+/**
+ * store_tag1 - Store 1 tag (nibble) into byte
+ * @ptr: The tagged address
+ * @mem: The tag address (packed, 2 tags in byte)
+ * @tag: The tag to be stored in the nibble
+ *
+ * For use in a non-parallel context, store to the given nibble.
+ */
+void store_tag1(uint64_t ptr, uint8_t *mem, int tag);
+
+#endif /* TARGET_ARM_MTE_H */
diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
index a50d576294..25ea24ee53 100644
--- a/target/arm/tcg/mte_helper.c
+++ b/target/arm/tcg/mte_helper.c
@@ -29,6 +29,7 @@
  #include "hw/core/tcg-cpu-ops.h"
  #include "qapi/error.h"
  #include "qemu/guest-random.h"
+#include "mte_helper.h"


  static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
@@ -50,42 +51,10 @@ static int choose_nonexcluded_tag(int tag, int 
offset, uint16_t exclude)
      return tag;
  }

-/**
- * allocation_tag_mem_probe:
- * @env: the cpu environment
- * @ptr_mmu_idx: the addressing regime to use for the virtual address
- * @ptr: the virtual address for which to look up tag memory
- * @ptr_access: the access to use for the virtual address
- * @ptr_size: the number of bytes in the normal memory access
- * @tag_access: the access to use for the tag memory
- * @probe: true to merely probe, never taking an exception
- * @ra: the return address for exception handling
- *
- * Our tag memory is formatted as a sequence of little-endian nibbles.
- * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
- * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
- * for the higher addr.
- *
- * Here, resolve the physical address from the virtual address, and return
- * a pointer to the corresponding tag byte.
- *
- * If there is no tag storage corresponding to @ptr, return NULL.
- *
- * If the page is inaccessible for @ptr_access, or has a watchpoint, 
there are
- * three options:
- * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page 
is not
- *     accessible, and do not take watchpoint traps. The calling code must
- *     handle those cases in the right priority compared to MTE traps.
- * (2) probe = false, ra = 0 : probe, no fault expected -- the caller 
guarantees
- *     that the page is going to be accessible. We will take watchpoint 
traps.
- * (3) probe = false, ra != 0 : non-probe -- we will take both memory 
access
- *     traps and watchpoint traps.
- * (probe = true, ra != 0 is invalid and will assert.)
- */
-static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
-                                         uint64_t ptr, MMUAccessType 
ptr_access,
-                                         int ptr_size, MMUAccessType 
tag_access,
-                                         bool probe, uintptr_t ra)
+uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
+                                  uint64_t ptr, MMUAccessType ptr_access,
+                                  int ptr_size, MMUAccessType tag_access,
+                                  bool probe, uintptr_t ra)
  {
  #ifdef CONFIG_USER_ONLY
      uint64_t clean_ptr = useronly_clean_ptr(ptr);
@@ -287,7 +256,7 @@ uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr,
      return address_with_allocation_tag(ptr + offset, rtag);
  }

-static int load_tag1(uint64_t ptr, uint8_t *mem)
+int load_tag1(uint64_t ptr, uint8_t *mem)
  {
      int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
      return extract32(*mem, ofs, 4);
@@ -320,8 +289,7 @@ static void check_tag_aligned(CPUARMState *env, 
uint64_t ptr, uintptr_t ra)
      }
  }

-/* For use in a non-parallel context, store to the given nibble.  */
-static void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
+void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
  {
      int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
      *mem = deposit32(*mem, ofs, 4, tag);
---

Or if you prefer a diff to squash on this commit:

-- >8 --
diff --git a/target/arm/tcg/mte_helper.h b/target/arm/tcg/mte_helper.h
index 6a82ff3403..0e7a67cf0f 100644
--- a/target/arm/tcg/mte_helper.h
+++ b/target/arm/tcg/mte_helper.h
@@ -9,6 +9,8 @@
  #ifndef TARGET_ARM_MTE_H
  #define TARGET_ARM_MTE_H

+#include "exec/mmu-access-type.h"
+
  /**
   * allocation_tag_mem_probe:
   * @env: the cpu environment
@@ -51,11 +53,7 @@ uint8_t *allocation_tag_mem_probe(CPUARMState *env, 
int ptr_mmu_idx,
   * @ptr: The tagged address
   * @mem: The tag address (packed, 2 tags in byte)
   */
-static inline int load_tag1(uint64_t ptr, uint8_t *mem)
-{
-    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
-    return extract32(*mem, ofs, 4);
-}
+int load_tag1(uint64_t ptr, uint8_t *mem);

  /**
   * store_tag1 - Store 1 tag (nibble) into byte
@@ -65,10 +63,6 @@ static inline int load_tag1(uint64_t ptr, uint8_t *mem)
   *
   * For use in a non-parallel context, store to the given nibble.
   */
-static inline void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
-{
-    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
-    *mem = deposit32(*mem, ofs, 4, tag);
-}
+void store_tag1(uint64_t ptr, uint8_t *mem, int tag);

  #endif /* TARGET_ARM_MTE_H */
diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
index da6bc72b9c..25ea24ee53 100644
--- a/target/arm/tcg/mte_helper.c
+++ b/target/arm/tcg/mte_helper.c
@@ -256,6 +256,12 @@ uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t 
ptr,
      return address_with_allocation_tag(ptr + offset, rtag);
  }

+int load_tag1(uint64_t ptr, uint8_t *mem)
+{
+    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+    return extract32(*mem, ofs, 4);
+}
+
  uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
  {
      int mmu_idx = arm_env_mmu_index(env);
@@ -283,6 +289,12 @@ static void check_tag_aligned(CPUARMState *env, 
uint64_t ptr, uintptr_t ra)
      }
  }

+void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
+{
+    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+    *mem = deposit32(*mem, ofs, 4, tag);
+}
+
  /* For use in a parallel context, atomically store to the given 
nibble.  */
  static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag)
  {
---

Regards,

Phil.