[PATCH 1/3] scripts: introduce scripts/update-aarch64-sysreg-code.py

Eric Auger posted 3 patches 1 week, 1 day ago
Maintainers: John Snow <jsnow@redhat.com>, Cleber Rosa <crosa@redhat.com>, Peter Maydell <peter.maydell@linaro.org>
[PATCH 1/3] scripts: introduce scripts/update-aarch64-sysreg-code.py
Posted by Eric Auger 1 week, 1 day ago
Introduce a script that takes as input the Registers.json file
delivered in the AARCHMRS Features Model downloadable from the
Arm Developer A-Profile Architecture Exploration Tools page:
https://developer.arm.com/Architectures/A-Profile%20Architecture#Downloads
and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>).

We only care about IDregs with opcodes satisfying:
op0 = 3, op1 within [0, 3], crn = 0, crm within [0, 7], op2 within [0, 7]

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

This was tested with https://developer.arm.com/-/cdn-downloads/permalink/Exploration-Tools-OS-Machine-Readable-Data/AARCHMRS_BSD/AARCHMRS_OPENSOURCE_A_profile_FAT-2025-09_ASL0.tar.gz

Discussion about undesired generated regs can be found in
https://lore.kernel.org/all/CAFEAcA9OXi4v+hdBMamQv85HYp2EqxOA5=nfsdZ5E3nf8RP_pw@mail.gmail.com/
---
 scripts/update-aarch64-sysreg-code.py | 133 ++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100755 scripts/update-aarch64-sysreg-code.py

diff --git a/scripts/update-aarch64-sysreg-code.py b/scripts/update-aarch64-sysreg-code.py
new file mode 100755
index 0000000000..c7b31035d1
--- /dev/null
+++ b/scripts/update-aarch64-sysreg-code.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+
+# This script takes as input the Registers.json file delivered in
+# the AARCHMRS Features Model downloadable from the Arm Developer
+# A-Profile Architecture Exploration Tools page:
+# https://developer.arm.com/Architectures/A-Profile%20Architecture#Downloads
+# and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
+# under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>)
+#
+# Copyright (C) 2025 Red Hat, Inc.
+#
+# Authors: Eric Auger <eric.auger@redhat.com>
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+
+import json
+import os
+import sys
+
+# returns the int value of a given @opcode for a reg @encoding
+def get_opcode(encoding, opcode):
+    fvalue = encoding.get(opcode)
+    if fvalue:
+        value = fvalue.get('value')
+        if isinstance(value, str):
+            value = value.strip("'")
+            value = int(value,2)
+            return value
+    return -1
+
+def extract_idregs_from_registers_json(filename):
+    """
+    Load a Registers.json file and extract all ID registers, decode their
+    opcode and dump the information in target/arm/cpu-sysregs.h.inc
+
+    Args:
+        filename (str): The path to the Registers.json
+    returns:
+        idregs: list of ID regs and their encoding
+    """
+    if not os.path.exists(filename):
+        print(f"Error: {filename} could not be found!")
+        return {}
+
+    try:
+        with open(filename, 'r') as f:
+            register_data = json.load(f)
+
+    except json.JSONDecodeError:
+        print(f"Could not decode json from '{filename}'!")
+        return {}
+    except Exception as e:
+        print(f"Unexpected error while reading {filename}: {e}")
+        return {}
+
+    registers = [r for r in register_data if isinstance(r, dict) and \
+                r.get('_type') == 'Register']
+
+    idregs = {}
+
+    # Some regs have op code values like 000x, 001x. Anyway we don't need
+    # them. Besides some regs are undesired in the generated file such as
+    # CCSIDR_EL1 and CCSIDR2_EL1 which are arrays of regs. Also exclude
+    # VMPIDR_EL2 and VPIDR_EL2 which are outside of the IDreg scope we
+    # are interested in and are tricky to decode as their system accessor
+    # refer to MPIDR_EL1/MIDR_EL1 respectively
+
+    skiplist = ['ALLINT', 'PM', 'S1_', 'S3_', 'SVCR', \
+                'CCSIDR_EL1', 'CCSIDR2_EL1', 'VMPIDR_EL2', 'VPIDR_EL2']
+
+    for register in registers:
+        reg_name = register.get('name')
+
+        is_skipped = any(term in (reg_name or "").upper() for term in skiplist)
+
+        if reg_name and not is_skipped:
+            accessors = register.get('accessors', [])
+
+            for accessor in accessors:
+                type = accessor.get('_type')
+                if type in ['Accessors.SystemAccessor']:
+                    encoding_list = accessor.get('encoding')
+
+                    if isinstance(encoding_list, list) and encoding_list and \
+                       isinstance(encoding_list[0], dict):
+                        encoding_wrapper = encoding_list[0]
+                        encoding_source = encoding_wrapper.get('encodings', \
+                                                               encoding_wrapper)
+
+                        if isinstance(encoding_source, dict):
+                                op0 = get_opcode(encoding_source, 'op0')
+                                op1 = get_opcode(encoding_source, 'op1')
+                                op2 = get_opcode(encoding_source, 'op2')
+                                crn = get_opcode(encoding_source, 'CRn')
+                                crm = get_opcode(encoding_source, 'CRm')
+                                encoding_str=f"{op0} {op1} {crn} {crm} {op2}"
+
+                # ID regs are assumed within this scope
+                if op0 == 3 and (op1 == 0 or op1 == 1 or op1 == 3) and \
+                   crn == 0 and (crm >= 0 and crm <= 7) and (op2 >= 0 and op2 <= 7):
+                    idregs[reg_name] = encoding_str
+
+    return idregs
+
+if __name__ == "__main__":
+    # Single arg expectedr: the path to the Registers.json file
+    if len(sys.argv) < 2:
+        print("Usage: scripts/update-aarch64-sysreg-code.py <path_to_registers_json>")
+        sys.exit(1)
+    else:
+        json_file_path = sys.argv[1]
+
+    extracted_registers = extract_idregs_from_registers_json(json_file_path)
+
+    if extracted_registers:
+        output_list = extracted_registers.items()
+
+        # Sort by register name
+        sorted_output = sorted(output_list, key=lambda item: item[0])
+
+        # format lines as DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>)
+        final_output = ""
+        for reg_name, encoding in sorted_output:
+            reformatted_encoding = encoding.replace(" ", ", ")
+            final_output += f"DEF({reg_name}, {reformatted_encoding})\n"
+
+        with open("target/arm/cpu-sysregs.h.inc", 'w') as f:
+            f.write("/* SPDX-License-Identifier: BSD-3-Clause */\n\n")
+            f.write("/* This file is autogenerated by ")
+            f.write("scripts/update-aarch64-sysreg-code.py */\n\n")
+            f.write(final_output)
+        print(f"updated target/arm/cpu-sysregs.h.inc")
-- 
2.52.0
Re: [PATCH 1/3] scripts: introduce scripts/update-aarch64-sysreg-code.py
Posted by Philippe Mathieu-Daudé 1 week ago
On 8/12/25 17:37, Eric Auger wrote:
> Introduce a script that takes as input the Registers.json file
> delivered in the AARCHMRS Features Model downloadable from the
> Arm Developer A-Profile Architecture Exploration Tools page:
> https://developer.arm.com/Architectures/A-Profile%20Architecture#Downloads
> and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
> under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>).

Great idea!

> 
> We only care about IDregs with opcodes satisfying:
> op0 = 3, op1 within [0, 3], crn = 0, crm within [0, 7], op2 within [0, 7]
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> This was tested with https://developer.arm.com/-/cdn-downloads/permalink/Exploration-Tools-OS-Machine-Readable-Data/AARCHMRS_BSD/AARCHMRS_OPENSOURCE_A_profile_FAT-2025-09_ASL0.tar.gz
> 
> Discussion about undesired generated regs can be found in
> https://lore.kernel.org/all/CAFEAcA9OXi4v+hdBMamQv85HYp2EqxOA5=nfsdZ5E3nf8RP_pw@mail.gmail.com/
> ---
>   scripts/update-aarch64-sysreg-code.py | 133 ++++++++++++++++++++++++++
>   1 file changed, 133 insertions(+)
>   create mode 100755 scripts/update-aarch64-sysreg-code.py
> 
> diff --git a/scripts/update-aarch64-sysreg-code.py b/scripts/update-aarch64-sysreg-code.py
> new file mode 100755
> index 0000000000..c7b31035d1
> --- /dev/null
> +++ b/scripts/update-aarch64-sysreg-code.py
> @@ -0,0 +1,133 @@
> +#!/usr/bin/env python3
> +
> +# This script takes as input the Registers.json file delivered in
> +# the AARCHMRS Features Model downloadable from the Arm Developer
> +# A-Profile Architecture Exploration Tools page:
> +# https://developer.arm.com/Architectures/A-Profile%20Architecture#Downloads
> +# and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
> +# under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>)
> +#
> +# Copyright (C) 2025 Red Hat, Inc.
> +#
> +# Authors: Eric Auger <eric.auger@redhat.com>
> +#
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +
> +
> +import json
> +import os
> +import sys
> +

[*]

> +# returns the int value of a given @opcode for a reg @encoding
> +def get_opcode(encoding, opcode):
> +    fvalue = encoding.get(opcode)
> +    if fvalue:
> +        value = fvalue.get('value')
> +        if isinstance(value, str):
> +            value = value.strip("'")
> +            value = int(value,2)
> +            return value
> +    return -1
> +
> +def extract_idregs_from_registers_json(filename):
> +    """
> +    Load a Registers.json file and extract all ID registers, decode their
> +    opcode and dump the information in target/arm/cpu-sysregs.h.inc
> +
> +    Args:
> +        filename (str): The path to the Registers.json
> +    returns:
> +        idregs: list of ID regs and their encoding
> +    """
> +    if not os.path.exists(filename):
> +        print(f"Error: {filename} could not be found!")
> +        return {}
> +
> +    try:
> +        with open(filename, 'r') as f:
> +            register_data = json.load(f)
> +
> +    except json.JSONDecodeError:
> +        print(f"Could not decode json from '{filename}'!")
> +        return {}
> +    except Exception as e:
> +        print(f"Unexpected error while reading {filename}: {e}")
> +        return {}
> +
> +    registers = [r for r in register_data if isinstance(r, dict) and \
> +                r.get('_type') == 'Register']
> +
> +    idregs = {}
> +
> +    # Some regs have op code values like 000x, 001x. Anyway we don't need
> +    # them. Besides some regs are undesired in the generated file such as
> +    # CCSIDR_EL1 and CCSIDR2_EL1 which are arrays of regs. Also exclude
> +    # VMPIDR_EL2 and VPIDR_EL2 which are outside of the IDreg scope we
> +    # are interested in and are tricky to decode as their system accessor
> +    # refer to MPIDR_EL1/MIDR_EL1 respectively
> +
> +    skiplist = ['ALLINT', 'PM', 'S1_', 'S3_', 'SVCR', \
> +                'CCSIDR_EL1', 'CCSIDR2_EL1', 'VMPIDR_EL2', 'VPIDR_EL2']

Since we might have to update this array, I'd move it (and the big
comment preceding) in [*].

> +
> +    for register in registers:
> +        reg_name = register.get('name')
> +
> +        is_skipped = any(term in (reg_name or "").upper() for term in skiplist)
> +
> +        if reg_name and not is_skipped:
> +            accessors = register.get('accessors', [])
> +
> +            for accessor in accessors:
> +                type = accessor.get('_type')
> +                if type in ['Accessors.SystemAccessor']:
> +                    encoding_list = accessor.get('encoding')
> +
> +                    if isinstance(encoding_list, list) and encoding_list and \
> +                       isinstance(encoding_list[0], dict):
> +                        encoding_wrapper = encoding_list[0]
> +                        encoding_source = encoding_wrapper.get('encodings', \
> +                                                               encoding_wrapper)
> +
> +                        if isinstance(encoding_source, dict):
> +                                op0 = get_opcode(encoding_source, 'op0')
> +                                op1 = get_opcode(encoding_source, 'op1')
> +                                op2 = get_opcode(encoding_source, 'op2')
> +                                crn = get_opcode(encoding_source, 'CRn')
> +                                crm = get_opcode(encoding_source, 'CRm')
> +                                encoding_str=f"{op0} {op1} {crn} {crm} {op2}"
> +
> +                # ID regs are assumed within this scope
> +                if op0 == 3 and (op1 == 0 or op1 == 1 or op1 == 3) and \
> +                   crn == 0 and (crm >= 0 and crm <= 7) and (op2 >= 0 and op2 <= 7):
> +                    idregs[reg_name] = encoding_str
> +
> +    return idregs
> +
> +if __name__ == "__main__":
> +    # Single arg expectedr: the path to the Registers.json file

Typo "expectedr".

> +    if len(sys.argv) < 2:
> +        print("Usage: scripts/update-aarch64-sysreg-code.py <path_to_registers_json>")
> +        sys.exit(1)
> +    else:
> +        json_file_path = sys.argv[1]
> +
> +    extracted_registers = extract_idregs_from_registers_json(json_file_path)
> +
> +    if extracted_registers:
> +        output_list = extracted_registers.items()
> +
> +        # Sort by register name
> +        sorted_output = sorted(output_list, key=lambda item: item[0])
> +
> +        # format lines as DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>)
> +        final_output = ""
> +        for reg_name, encoding in sorted_output:
> +            reformatted_encoding = encoding.replace(" ", ", ")
> +            final_output += f"DEF({reg_name}, {reformatted_encoding})\n"
> +
> +        with open("target/arm/cpu-sysregs.h.inc", 'w') as f:
> +            f.write("/* SPDX-License-Identifier: BSD-3-Clause */\n\n")
> +            f.write("/* This file is autogenerated by ")
> +            f.write("scripts/update-aarch64-sysreg-code.py */\n\n")
> +            f.write(final_output)
> +        print(f"updated target/arm/cpu-sysregs.h.inc")

Fixed string (no formating) so no need for f- prefix.

Patch LGTM but it should have some unit test.
Re: [PATCH 1/3] scripts: introduce scripts/update-aarch64-sysreg-code.py
Posted by Eric Auger 1 week ago
Hi Philippe,

On 12/9/25 1:30 PM, Philippe Mathieu-Daudé wrote:
> On 8/12/25 17:37, Eric Auger wrote:
>> Introduce a script that takes as input the Registers.json file
>> delivered in the AARCHMRS Features Model downloadable from the
>> Arm Developer A-Profile Architecture Exploration Tools page:
>> https://developer.arm.com/Architectures/A-Profile%20Architecture#Downloads
>>
>> and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
>> under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>).
>
> Great idea!
>
>>
>> We only care about IDregs with opcodes satisfying:
>> op0 = 3, op1 within [0, 3], crn = 0, crm within [0, 7], op2 within
>> [0, 7]
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> This was tested with
>> https://developer.arm.com/-/cdn-downloads/permalink/Exploration-Tools-OS-Machine-Readable-Data/AARCHMRS_BSD/AARCHMRS_OPENSOURCE_A_profile_FAT-2025-09_ASL0.tar.gz
>>
>> Discussion about undesired generated regs can be found in
>> https://lore.kernel.org/all/CAFEAcA9OXi4v+hdBMamQv85HYp2EqxOA5=nfsdZ5E3nf8RP_pw@mail.gmail.com/
>>
>> ---
>>   scripts/update-aarch64-sysreg-code.py | 133 ++++++++++++++++++++++++++
>>   1 file changed, 133 insertions(+)
>>   create mode 100755 scripts/update-aarch64-sysreg-code.py
>>
>> diff --git a/scripts/update-aarch64-sysreg-code.py
>> b/scripts/update-aarch64-sysreg-code.py
>> new file mode 100755
>> index 0000000000..c7b31035d1
>> --- /dev/null
>> +++ b/scripts/update-aarch64-sysreg-code.py
>> @@ -0,0 +1,133 @@
>> +#!/usr/bin/env python3
>> +
>> +# This script takes as input the Registers.json file delivered in
>> +# the AARCHMRS Features Model downloadable from the Arm Developer
>> +# A-Profile Architecture Exploration Tools page:
>> +#
>> https://developer.arm.com/Architectures/A-Profile%20Architecture#Downloads
>> +# and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
>> +# under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>)
>> +#
>> +# Copyright (C) 2025 Red Hat, Inc.
>> +#
>> +# Authors: Eric Auger <eric.auger@redhat.com>
>> +#
>> +# SPDX-License-Identifier: GPL-2.0-or-later
>> +
>> +
>> +import json
>> +import os
>> +import sys
>> +
>
> [*]
>
>> +# returns the int value of a given @opcode for a reg @encoding
>> +def get_opcode(encoding, opcode):
>> +    fvalue = encoding.get(opcode)
>> +    if fvalue:
>> +        value = fvalue.get('value')
>> +        if isinstance(value, str):
>> +            value = value.strip("'")
>> +            value = int(value,2)
>> +            return value
>> +    return -1
>> +
>> +def extract_idregs_from_registers_json(filename):
>> +    """
>> +    Load a Registers.json file and extract all ID registers, decode
>> their
>> +    opcode and dump the information in target/arm/cpu-sysregs.h.inc
>> +
>> +    Args:
>> +        filename (str): The path to the Registers.json
>> +    returns:
>> +        idregs: list of ID regs and their encoding
>> +    """
>> +    if not os.path.exists(filename):
>> +        print(f"Error: {filename} could not be found!")
>> +        return {}
>> +
>> +    try:
>> +        with open(filename, 'r') as f:
>> +            register_data = json.load(f)
>> +
>> +    except json.JSONDecodeError:
>> +        print(f"Could not decode json from '{filename}'!")
>> +        return {}
>> +    except Exception as e:
>> +        print(f"Unexpected error while reading {filename}: {e}")
>> +        return {}
>> +
>> +    registers = [r for r in register_data if isinstance(r, dict) and \
>> +                r.get('_type') == 'Register']
>> +
>> +    idregs = {}
>> +
>> +    # Some regs have op code values like 000x, 001x. Anyway we don't
>> need
>> +    # them. Besides some regs are undesired in the generated file
>> such as
>> +    # CCSIDR_EL1 and CCSIDR2_EL1 which are arrays of regs. Also exclude
>> +    # VMPIDR_EL2 and VPIDR_EL2 which are outside of the IDreg scope we
>> +    # are interested in and are tricky to decode as their system
>> accessor
>> +    # refer to MPIDR_EL1/MIDR_EL1 respectively
>> +
>> +    skiplist = ['ALLINT', 'PM', 'S1_', 'S3_', 'SVCR', \
>> +                'CCSIDR_EL1', 'CCSIDR2_EL1', 'VMPIDR_EL2', 'VPIDR_EL2']
>
> Since we might have to update this array, I'd move it (and the big
> comment preceding) in [*].
>
>> +
>> +    for register in registers:
>> +        reg_name = register.get('name')
>> +
>> +        is_skipped = any(term in (reg_name or "").upper() for term
>> in skiplist)
>> +
>> +        if reg_name and not is_skipped:
>> +            accessors = register.get('accessors', [])
>> +
>> +            for accessor in accessors:
>> +                type = accessor.get('_type')
>> +                if type in ['Accessors.SystemAccessor']:
>> +                    encoding_list = accessor.get('encoding')
>> +
>> +                    if isinstance(encoding_list, list) and
>> encoding_list and \
>> +                       isinstance(encoding_list[0], dict):
>> +                        encoding_wrapper = encoding_list[0]
>> +                        encoding_source =
>> encoding_wrapper.get('encodings', \
>> +                                                              
>> encoding_wrapper)
>> +
>> +                        if isinstance(encoding_source, dict):
>> +                                op0 = get_opcode(encoding_source,
>> 'op0')
>> +                                op1 = get_opcode(encoding_source,
>> 'op1')
>> +                                op2 = get_opcode(encoding_source,
>> 'op2')
>> +                                crn = get_opcode(encoding_source,
>> 'CRn')
>> +                                crm = get_opcode(encoding_source,
>> 'CRm')
>> +                                encoding_str=f"{op0} {op1} {crn}
>> {crm} {op2}"
>> +
>> +                # ID regs are assumed within this scope
>> +                if op0 == 3 and (op1 == 0 or op1 == 1 or op1 == 3)
>> and \
>> +                   crn == 0 and (crm >= 0 and crm <= 7) and (op2 >=
>> 0 and op2 <= 7):
>> +                    idregs[reg_name] = encoding_str
>> +
>> +    return idregs
>> +
>> +if __name__ == "__main__":
>> +    # Single arg expectedr: the path to the Registers.json file
>
> Typo "expectedr".
>
>> +    if len(sys.argv) < 2:
>> +        print("Usage: scripts/update-aarch64-sysreg-code.py
>> <path_to_registers_json>")
>> +        sys.exit(1)
>> +    else:
>> +        json_file_path = sys.argv[1]
>> +
>> +    extracted_registers =
>> extract_idregs_from_registers_json(json_file_path)
>> +
>> +    if extracted_registers:
>> +        output_list = extracted_registers.items()
>> +
>> +        # Sort by register name
>> +        sorted_output = sorted(output_list, key=lambda item: item[0])
>> +
>> +        # format lines as DEF(<name>, <op0>, <op1>, <crn>, <crm>,
>> <op2>)
>> +        final_output = ""
>> +        for reg_name, encoding in sorted_output:
>> +            reformatted_encoding = encoding.replace(" ", ", ")
>> +            final_output += f"DEF({reg_name},
>> {reformatted_encoding})\n"
>> +
>> +        with open("target/arm/cpu-sysregs.h.inc", 'w') as f:
>> +            f.write("/* SPDX-License-Identifier: BSD-3-Clause */\n\n")
>> +            f.write("/* This file is autogenerated by ")
>> +            f.write("scripts/update-aarch64-sysreg-code.py */\n\n")
>> +            f.write(final_output)
>> +        print(f"updated target/arm/cpu-sysregs.h.inc")
>
> Fixed string (no formating) so no need for f- prefix.
>
> Patch LGTM but it should have some unit test. 

thank you for the review.

Not sure what you mean by unit test? One solution could be to diff the
result with former bash/awk I used to generation previously [1] but i
guess it would be awkward to upstream the awk script we did not want in
the first place. Otherwise we could check some random opcodes but it
wouldn't mean the others are correct. To me the best way to validate the
python script is to do [1] once but do not upstream that. Reviewing the
new generated files against the previous content [3/3] is the best way
to test the result. I mean using auto generation does not prevent from
reviewing the generated stuff, especially because the generation is
triggered manually as scripts/update_linux_headers.sh and should not
happen very frequenty.

Thoughts?

Eric  


Re: [PATCH 1/3] scripts: introduce scripts/update-aarch64-sysreg-code.py
Posted by Philippe Mathieu-Daudé 1 week ago
On 9/12/25 14:40, Eric Auger wrote:
> Hi Philippe,
> 
> On 12/9/25 1:30 PM, Philippe Mathieu-Daudé wrote:
>> On 8/12/25 17:37, Eric Auger wrote:
>>> Introduce a script that takes as input the Registers.json file
>>> delivered in the AARCHMRS Features Model downloadable from the
>>> Arm Developer A-Profile Architecture Exploration Tools page:
>>> https://developer.arm.com/Architectures/A-Profile%20Architecture#Downloads
>>>
>>> and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
>>> under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>).
>>
>> Great idea!
>>
>>>
>>> We only care about IDregs with opcodes satisfying:
>>> op0 = 3, op1 within [0, 3], crn = 0, crm within [0, 7], op2 within
>>> [0, 7]
>>>
>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>
>>> ---
>>>
>>> This was tested with
>>> https://developer.arm.com/-/cdn-downloads/permalink/Exploration-Tools-OS-Machine-Readable-Data/AARCHMRS_BSD/AARCHMRS_OPENSOURCE_A_profile_FAT-2025-09_ASL0.tar.gz
>>>
>>> Discussion about undesired generated regs can be found in
>>> https://lore.kernel.org/all/CAFEAcA9OXi4v+hdBMamQv85HYp2EqxOA5=nfsdZ5E3nf8RP_pw@mail.gmail.com/
>>>
>>> ---
>>>    scripts/update-aarch64-sysreg-code.py | 133 ++++++++++++++++++++++++++
>>>    1 file changed, 133 insertions(+)
>>>    create mode 100755 scripts/update-aarch64-sysreg-code.py
>>>
>>> diff --git a/scripts/update-aarch64-sysreg-code.py
>>> b/scripts/update-aarch64-sysreg-code.py
>>> new file mode 100755
>>> index 0000000000..c7b31035d1
>>> --- /dev/null
>>> +++ b/scripts/update-aarch64-sysreg-code.py
>>> @@ -0,0 +1,133 @@
>>> +#!/usr/bin/env python3
>>> +
>>> +# This script takes as input the Registers.json file delivered in
>>> +# the AARCHMRS Features Model downloadable from the Arm Developer
>>> +# A-Profile Architecture Exploration Tools page:
>>> +#
>>> https://developer.arm.com/Architectures/A-Profile%20Architecture#Downloads
>>> +# and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
>>> +# under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>)
>>> +#
>>> +# Copyright (C) 2025 Red Hat, Inc.
>>> +#
>>> +# Authors: Eric Auger <eric.auger@redhat.com>
>>> +#
>>> +# SPDX-License-Identifier: GPL-2.0-or-later
>>> +
>>> +
>>> +import json
>>> +import os
>>> +import sys
>>> +
>>
>> [*]
>>
>>> +# returns the int value of a given @opcode for a reg @encoding
>>> +def get_opcode(encoding, opcode):
>>> +    fvalue = encoding.get(opcode)
>>> +    if fvalue:
>>> +        value = fvalue.get('value')
>>> +        if isinstance(value, str):
>>> +            value = value.strip("'")
>>> +            value = int(value,2)
>>> +            return value
>>> +    return -1
>>> +
>>> +def extract_idregs_from_registers_json(filename):
>>> +    """
>>> +    Load a Registers.json file and extract all ID registers, decode
>>> their
>>> +    opcode and dump the information in target/arm/cpu-sysregs.h.inc
>>> +
>>> +    Args:
>>> +        filename (str): The path to the Registers.json
>>> +    returns:
>>> +        idregs: list of ID regs and their encoding
>>> +    """
>>> +    if not os.path.exists(filename):
>>> +        print(f"Error: {filename} could not be found!")
>>> +        return {}
>>> +
>>> +    try:
>>> +        with open(filename, 'r') as f:
>>> +            register_data = json.load(f)
>>> +
>>> +    except json.JSONDecodeError:
>>> +        print(f"Could not decode json from '{filename}'!")
>>> +        return {}
>>> +    except Exception as e:
>>> +        print(f"Unexpected error while reading {filename}: {e}")
>>> +        return {}
>>> +
>>> +    registers = [r for r in register_data if isinstance(r, dict) and \
>>> +                r.get('_type') == 'Register']
>>> +
>>> +    idregs = {}
>>> +
>>> +    # Some regs have op code values like 000x, 001x. Anyway we don't
>>> need
>>> +    # them. Besides some regs are undesired in the generated file
>>> such as
>>> +    # CCSIDR_EL1 and CCSIDR2_EL1 which are arrays of regs. Also exclude
>>> +    # VMPIDR_EL2 and VPIDR_EL2 which are outside of the IDreg scope we
>>> +    # are interested in and are tricky to decode as their system
>>> accessor
>>> +    # refer to MPIDR_EL1/MIDR_EL1 respectively
>>> +
>>> +    skiplist = ['ALLINT', 'PM', 'S1_', 'S3_', 'SVCR', \
>>> +                'CCSIDR_EL1', 'CCSIDR2_EL1', 'VMPIDR_EL2', 'VPIDR_EL2']
>>
>> Since we might have to update this array, I'd move it (and the big
>> comment preceding) in [*].
>>
>>> +
>>> +    for register in registers:
>>> +        reg_name = register.get('name')
>>> +
>>> +        is_skipped = any(term in (reg_name or "").upper() for term
>>> in skiplist)
>>> +
>>> +        if reg_name and not is_skipped:
>>> +            accessors = register.get('accessors', [])
>>> +
>>> +            for accessor in accessors:
>>> +                type = accessor.get('_type')
>>> +                if type in ['Accessors.SystemAccessor']:
>>> +                    encoding_list = accessor.get('encoding')
>>> +
>>> +                    if isinstance(encoding_list, list) and
>>> encoding_list and \
>>> +                       isinstance(encoding_list[0], dict):
>>> +                        encoding_wrapper = encoding_list[0]
>>> +                        encoding_source =
>>> encoding_wrapper.get('encodings', \
>>> +
>>> encoding_wrapper)
>>> +
>>> +                        if isinstance(encoding_source, dict):
>>> +                                op0 = get_opcode(encoding_source,
>>> 'op0')
>>> +                                op1 = get_opcode(encoding_source,
>>> 'op1')
>>> +                                op2 = get_opcode(encoding_source,
>>> 'op2')
>>> +                                crn = get_opcode(encoding_source,
>>> 'CRn')
>>> +                                crm = get_opcode(encoding_source,
>>> 'CRm')
>>> +                                encoding_str=f"{op0} {op1} {crn}
>>> {crm} {op2}"
>>> +
>>> +                # ID regs are assumed within this scope
>>> +                if op0 == 3 and (op1 == 0 or op1 == 1 or op1 == 3)
>>> and \
>>> +                   crn == 0 and (crm >= 0 and crm <= 7) and (op2 >=
>>> 0 and op2 <= 7):
>>> +                    idregs[reg_name] = encoding_str
>>> +
>>> +    return idregs
>>> +
>>> +if __name__ == "__main__":
>>> +    # Single arg expectedr: the path to the Registers.json file
>>
>> Typo "expectedr".
>>
>>> +    if len(sys.argv) < 2:
>>> +        print("Usage: scripts/update-aarch64-sysreg-code.py
>>> <path_to_registers_json>")
>>> +        sys.exit(1)
>>> +    else:
>>> +        json_file_path = sys.argv[1]
>>> +
>>> +    extracted_registers =
>>> extract_idregs_from_registers_json(json_file_path)
>>> +
>>> +    if extracted_registers:
>>> +        output_list = extracted_registers.items()
>>> +
>>> +        # Sort by register name
>>> +        sorted_output = sorted(output_list, key=lambda item: item[0])
>>> +
>>> +        # format lines as DEF(<name>, <op0>, <op1>, <crn>, <crm>,
>>> <op2>)
>>> +        final_output = ""
>>> +        for reg_name, encoding in sorted_output:
>>> +            reformatted_encoding = encoding.replace(" ", ", ")
>>> +            final_output += f"DEF({reg_name},
>>> {reformatted_encoding})\n"
>>> +
>>> +        with open("target/arm/cpu-sysregs.h.inc", 'w') as f:
>>> +            f.write("/* SPDX-License-Identifier: BSD-3-Clause */\n\n")
>>> +            f.write("/* This file is autogenerated by ")
>>> +            f.write("scripts/update-aarch64-sysreg-code.py */\n\n")
>>> +            f.write(final_output)
>>> +        print(f"updated target/arm/cpu-sysregs.h.inc")
>>
>> Fixed string (no formating) so no need for f- prefix.
>>
>> Patch LGTM but it should have some unit test.
> 
> thank you for the review.
> 
> Not sure what you mean by unit test? One solution could be to diff the
> result with former bash/awk I used to generation previously [1] but i
> guess it would be awkward to upstream the awk script we did not want in
> the first place. Otherwise we could check some random opcodes but it
> wouldn't mean the others are correct. To me the best way to validate the
> python script is to do [1] once but do not upstream that. Reviewing the
> new generated files against the previous content [3/3] is the best way
> to test the result. I mean using auto generation does not prevent from
> reviewing the generated stuff, especially because the generation is
> triggered manually as scripts/update_linux_headers.sh and should not
> happen very frequenty.
> 
> Thoughts?

I was thinking something minimal like what we use for decodetree,
see these 2 files:

  - tests/decode/meson.build
  - tests/decode/err_field10.decode

Re: [PATCH 1/3] scripts: introduce scripts/update-aarch64-sysreg-code.py
Posted by Philippe Mathieu-Daudé 1 week ago
On 9/12/25 13:30, Philippe Mathieu-Daudé wrote:
> On 8/12/25 17:37, Eric Auger wrote:
>> Introduce a script that takes as input the Registers.json file
>> delivered in the AARCHMRS Features Model downloadable from the
>> Arm Developer A-Profile Architecture Exploration Tools page:
>> https://developer.arm.com/Architectures/A- 
>> Profile%20Architecture#Downloads
>> and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
>> under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>).
> 
> Great idea!
> 
>>
>> We only care about IDregs with opcodes satisfying:
>> op0 = 3, op1 within [0, 3], crn = 0, crm within [0, 7], op2 within [0, 7]
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> This was tested with https://developer.arm.com/-/cdn-downloads/ 
>> permalink/Exploration-Tools-OS-Machine-Readable-Data/AARCHMRS_BSD/ 
>> AARCHMRS_OPENSOURCE_A_profile_FAT-2025-09_ASL0.tar.gz
>>
>> Discussion about undesired generated regs can be found in
>> https://lore.kernel.org/all/ 
>> CAFEAcA9OXi4v+hdBMamQv85HYp2EqxOA5=nfsdZ5E3nf8RP_pw@mail.gmail.com/
>> ---
>>   scripts/update-aarch64-sysreg-code.py | 133 ++++++++++++++++++++++++++
>>   1 file changed, 133 insertions(+)
>>   create mode 100755 scripts/update-aarch64-sysreg-code.py
>>
>> diff --git a/scripts/update-aarch64-sysreg-code.py b/scripts/update- 
>> aarch64-sysreg-code.py
>> new file mode 100755
>> index 0000000000..c7b31035d1
>> --- /dev/null
>> +++ b/scripts/update-aarch64-sysreg-code.py
>> @@ -0,0 +1,133 @@
>> +#!/usr/bin/env python3
>> +
>> +# This script takes as input the Registers.json file delivered in
>> +# the AARCHMRS Features Model downloadable from the Arm Developer
>> +# A-Profile Architecture Exploration Tools page:
>> +# https://developer.arm.com/Architectures/A- 
>> Profile%20Architecture#Downloads
>> +# and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
>> +# under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>)
>> +#
>> +# Copyright (C) 2025 Red Hat, Inc.
>> +#
>> +# Authors: Eric Auger <eric.auger@redhat.com>
>> +#
>> +# SPDX-License-Identifier: GPL-2.0-or-later
>> +
>> +
>> +import json
>> +import os
>> +import sys
>> +
> 
> [*]
> 
>> +# returns the int value of a given @opcode for a reg @encoding
>> +def get_opcode(encoding, opcode):
>> +    fvalue = encoding.get(opcode)
>> +    if fvalue:
>> +        value = fvalue.get('value')
>> +        if isinstance(value, str):
>> +            value = value.strip("'")
>> +            value = int(value,2)
>> +            return value
>> +    return -1
>> +
>> +def extract_idregs_from_registers_json(filename):
>> +    """
>> +    Load a Registers.json file and extract all ID registers, decode 
>> their
>> +    opcode and dump the information in target/arm/cpu-sysregs.h.inc
>> +
>> +    Args:
>> +        filename (str): The path to the Registers.json
>> +    returns:
>> +        idregs: list of ID regs and their encoding
>> +    """
>> +    if not os.path.exists(filename):
>> +        print(f"Error: {filename} could not be found!")
>> +        return {}
>> +
>> +    try:
>> +        with open(filename, 'r') as f:
>> +            register_data = json.load(f)
>> +
>> +    except json.JSONDecodeError:
>> +        print(f"Could not decode json from '{filename}'!")
>> +        return {}
>> +    except Exception as e:
>> +        print(f"Unexpected error while reading {filename}: {e}")
>> +        return {}
>> +
>> +    registers = [r for r in register_data if isinstance(r, dict) and \
>> +                r.get('_type') == 'Register']
>> +
>> +    idregs = {}
>> +
>> +    # Some regs have op code values like 000x, 001x. Anyway we don't 
>> need
>> +    # them. Besides some regs are undesired in the generated file 
>> such as
>> +    # CCSIDR_EL1 and CCSIDR2_EL1 which are arrays of regs. Also exclude
>> +    # VMPIDR_EL2 and VPIDR_EL2 which are outside of the IDreg scope we
>> +    # are interested in and are tricky to decode as their system 
>> accessor
>> +    # refer to MPIDR_EL1/MIDR_EL1 respectively
>> +
>> +    skiplist = ['ALLINT', 'PM', 'S1_', 'S3_', 'SVCR', \
>> +                'CCSIDR_EL1', 'CCSIDR2_EL1', 'VMPIDR_EL2', 'VPIDR_EL2']
> 
> Since we might have to update this array, I'd move it (and the big
> comment preceding) in [*].
> 
>> +
>> +    for register in registers:
>> +        reg_name = register.get('name')
>> +
>> +        is_skipped = any(term in (reg_name or "").upper() for term in 
>> skiplist)
>> +
>> +        if reg_name and not is_skipped:
>> +            accessors = register.get('accessors', [])
>> +
>> +            for accessor in accessors:
>> +                type = accessor.get('_type')
>> +                if type in ['Accessors.SystemAccessor']:
>> +                    encoding_list = accessor.get('encoding')
>> +
>> +                    if isinstance(encoding_list, list) and 
>> encoding_list and \
>> +                       isinstance(encoding_list[0], dict):
>> +                        encoding_wrapper = encoding_list[0]
>> +                        encoding_source = 
>> encoding_wrapper.get('encodings', \
>> +                                                               
>> encoding_wrapper)
>> +
>> +                        if isinstance(encoding_source, dict):
>> +                                op0 = get_opcode(encoding_source, 'op0')
>> +                                op1 = get_opcode(encoding_source, 'op1')
>> +                                op2 = get_opcode(encoding_source, 'op2')
>> +                                crn = get_opcode(encoding_source, 'CRn')
>> +                                crm = get_opcode(encoding_source, 'CRm')
>> +                                encoding_str=f"{op0} {op1} {crn} 
>> {crm} {op2}"
>> +
>> +                # ID regs are assumed within this scope
>> +                if op0 == 3 and (op1 == 0 or op1 == 1 or op1 == 3) and \
>> +                   crn == 0 and (crm >= 0 and crm <= 7) and (op2 >= 0 
>> and op2 <= 7):
>> +                    idregs[reg_name] = encoding_str
>> +
>> +    return idregs
>> +
>> +if __name__ == "__main__":
>> +    # Single arg expectedr: the path to the Registers.json file
> 
> Typo "expectedr".
> 
>> +    if len(sys.argv) < 2:
>> +        print("Usage: scripts/update-aarch64-sysreg-code.py 
>> <path_to_registers_json>")
>> +        sys.exit(1)
>> +    else:
>> +        json_file_path = sys.argv[1]
>> +
>> +    extracted_registers = 
>> extract_idregs_from_registers_json(json_file_path)
>> +
>> +    if extracted_registers:
>> +        output_list = extracted_registers.items()
>> +
>> +        # Sort by register name
>> +        sorted_output = sorted(output_list, key=lambda item: item[0])
>> +
>> +        # format lines as DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>)
>> +        final_output = ""
>> +        for reg_name, encoding in sorted_output:
>> +            reformatted_encoding = encoding.replace(" ", ", ")
>> +            final_output += f"DEF({reg_name}, {reformatted_encoding})\n"
>> +
>> +        with open("target/arm/cpu-sysregs.h.inc", 'w') as f:
>> +            f.write("/* SPDX-License-Identifier: BSD-3-Clause */\n\n")
>> +            f.write("/* This file is autogenerated by ")
>> +            f.write("scripts/update-aarch64-sysreg-code.py */\n\n")

Maybe worth adding the format in header:

      f.write("/* DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>) */\n")

>> +            f.write(final_output)
>> +        print(f"updated target/arm/cpu-sysregs.h.inc")
> 
> Fixed string (no formating) so no need for f- prefix.
> 
> Patch LGTM but it should have some unit test.


Re: [PATCH 1/3] scripts: introduce scripts/update-aarch64-sysreg-code.py
Posted by Cornelia Huck 1 week ago
On Mon, Dec 08 2025, Eric Auger <eric.auger@redhat.com> wrote:

> Introduce a script that takes as input the Registers.json file
> delivered in the AARCHMRS Features Model downloadable from the
> Arm Developer A-Profile Architecture Exploration Tools page:
> https://developer.arm.com/Architectures/A-Profile%20Architecture#Downloads
> and outputs the list of ID regs in target/arm/cpu-sysregs.h.inc
> under the form of DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>).
>
> We only care about IDregs with opcodes satisfying:
> op0 = 3, op1 within [0, 3], crn = 0, crm within [0, 7], op2 within [0, 7]
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>
> ---
>
> This was tested with https://developer.arm.com/-/cdn-downloads/permalink/Exploration-Tools-OS-Machine-Readable-Data/AARCHMRS_BSD/AARCHMRS_OPENSOURCE_A_profile_FAT-2025-09_ASL0.tar.gz
>
> Discussion about undesired generated regs can be found in
> https://lore.kernel.org/all/CAFEAcA9OXi4v+hdBMamQv85HYp2EqxOA5=nfsdZ5E3nf8RP_pw@mail.gmail.com/
> ---
>  scripts/update-aarch64-sysreg-code.py | 133 ++++++++++++++++++++++++++
>  1 file changed, 133 insertions(+)
>  create mode 100755 scripts/update-aarch64-sysreg-code.py
>
> diff --git a/scripts/update-aarch64-sysreg-code.py b/scripts/update-aarch64-sysreg-code.py
> new file mode 100755
> index 0000000000..c7b31035d1
> --- /dev/null
> +++ b/scripts/update-aarch64-sysreg-code.py

(...)

> +if __name__ == "__main__":
> +    # Single arg expectedr: the path to the Registers.json file
> +    if len(sys.argv) < 2:
> +        print("Usage: scripts/update-aarch64-sysreg-code.py <path_to_registers_json>")
> +        sys.exit(1)
> +    else:
> +        json_file_path = sys.argv[1]
> +
> +    extracted_registers = extract_idregs_from_registers_json(json_file_path)
> +
> +    if extracted_registers:
> +        output_list = extracted_registers.items()
> +
> +        # Sort by register name
> +        sorted_output = sorted(output_list, key=lambda item: item[0])
> +
> +        # format lines as DEF(<name>, <op0>, <op1>, <crn>, <crm>, <op2>)
> +        final_output = ""
> +        for reg_name, encoding in sorted_output:
> +            reformatted_encoding = encoding.replace(" ", ", ")
> +            final_output += f"DEF({reg_name}, {reformatted_encoding})\n"
> +
> +        with open("target/arm/cpu-sysregs.h.inc", 'w') as f:
> +            f.write("/* SPDX-License-Identifier: BSD-3-Clause */\n\n")
> +            f.write("/* This file is autogenerated by ")
> +            f.write("scripts/update-aarch64-sysreg-code.py */\n\n")

I'm wondering if there is an easy way to log the version of the json
file this has been generated against. If not, putting the information
into the commit message when updating is probably sufficient.

> +            f.write(final_output)
> +        print(f"updated target/arm/cpu-sysregs.h.inc")