[PATCH v3 4/4] x86-disas: add x86-mini disassembler implementation

Michael Clark posted 4 patches 6 months ago
Maintainers: John Snow <jsnow@redhat.com>, Cleber Rosa <crosa@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Zhao Liu <zhao1.liu@intel.com>
There is a newer version of this series
[PATCH v3 4/4] x86-disas: add x86-mini disassembler implementation
Posted by Michael Clark 6 months ago
the x86-mini library is a lightweight x86 encoder, decoder, and
disassembler that uses extensions to the Intel instruction set
metadata to encode modern VEX/EVEX instructions and legacy
instructions with a parameterized LEX (legacy extension) format.

this patch adds the x86-tablegen.py script, the disassembler,
a print_insn_x86 implementation plus host and target cpu stubs.

Signed-off-by: Michael Clark <michael@anarch128.org>
---
 disas/disas-host.c      |    5 +
 disas/meson.build       |   97 ++
 disas/x86-core.c        | 2716 +++++++++++++++++++++++++++++++++++++++
 disas/x86-disas.c       |   96 ++
 disas/x86.h             | 1860 +++++++++++++++++++++++++++
 include/disas/dis-asm.h |    1 +
 target/i386/cpu.c       |    7 +
 7 files changed, 4782 insertions(+)
 create mode 100644 disas/x86-core.c
 create mode 100644 disas/x86-disas.c
 create mode 100644 disas/x86.h

diff --git a/disas/disas-host.c b/disas/disas-host.c
index 8146fafe804c..e2d171464a87 100644
--- a/disas/disas-host.c
+++ b/disas/disas-host.c
@@ -6,6 +6,7 @@
 #include "qemu/osdep.h"
 #include "disas/disas.h"
 #include "disas/capstone.h"
+#include "disas/x86.h"
 #include "disas-internal.h"
 
 
@@ -50,12 +51,16 @@ static void initialize_debug_host(CPUDebug *s)
     s->info.cap_mode = CS_MODE_32;
     s->info.cap_insn_unit = 1;
     s->info.cap_insn_split = 8;
+    s->info.private_data = x86_ctx_create(x86_modes_32);
+    s->info.print_insn = print_insn_x86;
 #elif defined(__x86_64__)
     s->info.mach = bfd_mach_x86_64;
     s->info.cap_arch = CS_ARCH_X86;
     s->info.cap_mode = CS_MODE_64;
     s->info.cap_insn_unit = 1;
     s->info.cap_insn_split = 8;
+    s->info.private_data = x86_ctx_create(x86_modes_64);
+    s->info.print_insn = print_insn_x86;
 #elif defined(_ARCH_PPC)
     s->info.cap_arch = CS_ARCH_PPC;
 # ifdef _ARCH_PPC64
diff --git a/disas/meson.build b/disas/meson.build
index bbfa11978352..dca926a00987 100644
--- a/disas/meson.build
+++ b/disas/meson.build
@@ -9,6 +9,103 @@ common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files(
     'riscv-xthead.c',
     'riscv-xventana.c'
 ))
+x86_data = files(
+    'x86-data/x86_adx.csv',
+    'x86-data/x86_aes.csv',
+    'x86-data/x86_aeskle.csv',
+    'x86-data/x86_aesklewide.csv',
+    'x86-data/x86_avx2.csv',
+    'x86-data/x86_avx5124fmaps.csv',
+    'x86-data/x86_avx5124vnniw.csv',
+    'x86-data/x86_avx512b.csv',
+    'x86-data/x86_avx512bf16.csv',
+    'x86-data/x86_avx512bitalg.csv',
+    'x86-data/x86_avx512bw.csv',
+    'x86-data/x86_avx512cd.csv',
+    'x86-data/x86_avx512d.csv',
+    'x86-data/x86_avx512dq.csv',
+    'x86-data/x86_avx512er.csv',
+    'x86-data/x86_avx512f.csv',
+    'x86-data/x86_avx512fp16.csv',
+    'x86-data/x86_avx512ifma.csv',
+    'x86-data/x86_avx512pf.csv',
+    'x86-data/x86_avx512vbmi2.csv',
+    'x86-data/x86_avx512vbmi.csv',
+    'x86-data/x86_avx512vl.csv',
+    'x86-data/x86_avx512vnni.csv',
+    'x86-data/x86_avx512vp2intersect.csv',
+    'x86-data/x86_avx512vpopcntdq.csv',
+    'x86-data/x86_avx.csv',
+    'x86-data/x86_avxneconvert.csv',
+    'x86-data/x86_avxvnni.csv',
+    'x86-data/x86_avxvnniint8.csv',
+    'x86-data/x86_base.csv',
+    'x86-data/x86_bmi1.csv',
+    'x86-data/x86_bmi2.csv',
+    'x86-data/x86_cet.csv',
+    'x86-data/x86_cldemote.csv',
+    'x86-data/x86_clwb.csv',
+    'x86-data/x86_enqcmd.csv',
+    'x86-data/x86_f16c.csv',
+    'x86-data/x86_fma.csv',
+    'x86-data/x86_fsgsbase.csv',
+    'x86-data/x86_gfni.csv',
+    'x86-data/x86_hreset.csv',
+    'x86-data/x86_invpcid.csv',
+    'x86-data/x86_lzcnt.csv',
+    'x86-data/x86_mmx.csv',
+    'x86-data/x86_movdir64b.csv',
+    'x86-data/x86_movdiri.csv',
+    'x86-data/x86_mpx.csv',
+    'x86-data/x86_msrlist.csv',
+    'x86-data/x86_ospke.csv',
+    'x86-data/x86_pclmulqdq.csv',
+    'x86-data/x86_pconfig.csv',
+    'x86-data/x86_prefetchw.csv',
+    'x86-data/x86_raoint.csv',
+    'x86-data/x86_rdpid.csv',
+    'x86-data/x86_rdrand.csv',
+    'x86-data/x86_rdseed.csv',
+    'x86-data/x86_rtm.csv',
+    'x86-data/x86_serialize.csv',
+    'x86-data/x86_sha.csv',
+    'x86-data/x86_smap.csv',
+    'x86-data/x86_sse2.csv',
+    'x86-data/x86_sse3.csv',
+    'x86-data/x86_sse4_1.csv',
+    'x86-data/x86_sse4_2.csv',
+    'x86-data/x86_sse4_3.csv',
+    'x86-data/x86_sse.csv',
+    'x86-data/x86_ssse3.csv',
+    'x86-data/x86_uintr.csv',
+    'x86-data/x86_vaes.csv',
+    'x86-data/x86_vmx.csv',
+    'x86-data/x86_waitpkg.csv',
+    'x86-data/x86_wbnoinvd.csv',
+    'x86-data/x86_x87.csv',
+    'x86-data/x86_xsaveopt.csv'
+)
+x86_tablegen_py = find_program('../scripts/x86-tablegen.py')
+x86_gen_enums_inc = custom_target(
+    'x86-enums.inc',
+    output: 'x86-enums.inc',
+    depend_files: x86_data,
+    command: [x86_tablegen_py, '--print-opcode-enums',
+              '--output-file', '@OUTPUT@', x86_data]
+)
+x86_gen_tables_inc = custom_target(
+    'x86-tables.inc',
+    output: 'x86-tables.inc',
+    depend_files: x86_data,
+    command: [x86_tablegen_py, '--print-opcode-tables',
+              '--output-file', '@OUTPUT@', x86_data]
+)
+common_ss.add(when: 'CONFIG_I386_DIS', if_true: x86_gen_enums_inc)
+common_ss.add(when: 'CONFIG_I386_DIS', if_true: x86_gen_tables_inc)
+common_ss.add(when: 'CONFIG_I386_DIS', if_true: files(
+    'x86-core.c',
+    'x86-disas.c'
+))
 common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
 common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
 common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
diff --git a/disas/x86-core.c b/disas/x86-core.c
new file mode 100644
index 000000000000..c4f7034e3420
--- /dev/null
+++ b/disas/x86-core.c
@@ -0,0 +1,2716 @@
+/*
+ * Copyright (c) 2024-2025 Michael Clark
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <limits.h>
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+
+#include "disas/x86.h"
+#include "disas/x86-tables.inc"
+
+#define array_size(arr) (sizeof(arr) / sizeof(arr[0]))
+
+typedef struct x86_table_col x86_table_col;
+typedef struct x86_map_str x86_map_str;
+typedef struct x86_operands x86_operands;
+typedef struct x86_arg x86_arg;
+typedef struct x86_opc_prefix x86_opc_prefix;
+typedef struct x86_opr_formatter x86_opr_formatter;
+typedef struct x86_opr_formats x86_opr_formats;
+
+typedef size_t (*x86_opr_str_fn)(char *buf, size_t buflen, x86_codec *c,
+    x86_arg a);
+typedef size_t (*x86_opr_addr_fn)(char *buf, size_t buflen, x86_codec *c,
+    x86_arg a, size_t pc_offset, x86_fmt_symbol sym_cb);
+
+enum
+{
+    x86_state_top,
+    x86_state_segment,
+    x86_state_legacy,
+    x86_state_map_0f,
+    x86_state_lex_opcode,
+    x86_state_rex_opcode,
+    x86_state_vex_opcode,
+    x86_state_done,
+};
+
+struct x86_table_col
+{
+    int width;
+    char *data;
+};
+
+struct x86_map_str
+{
+    uint ord;
+    const char *str;
+};
+
+struct x86_opr_formatter
+{
+    x86_opr_str_fn fmt_const;
+    x86_opr_str_fn fmt_imm;
+    x86_opr_str_fn fmt_reg;
+    x86_opr_str_fn fmt_mrm;
+    x86_opr_str_fn fmt_vec;
+    x86_opr_str_fn fmt_opb;
+    x86_opr_str_fn fmt_is4;
+    x86_opr_str_fn fmt_ime;
+    x86_opr_addr_fn fmt_rel;
+};
+
+struct x86_opr_formats
+{
+    const char *ptr_rip;
+    const char *ptr_rip_disp;
+    const char *ptr_reg;
+    const char *ptr_reg_disp;
+    const char *ptr_reg_sreg;
+    const char *ptr_reg_sreg_disp;
+    const char *ptr_reg_reg;
+    const char *ptr_reg_reg_disp;
+    const char *ptr_sreg;
+    const char *ptr_disp;
+    const char *ptr_imm64;
+    const char *ptr_imm32;
+    const char *imm64;
+    const char *imm32;
+    const char *reg;
+};
+
+struct x86_operands
+{
+    ullong mod : 3;
+    ullong rm  : 3;
+    ullong r   : 5;
+    ullong x   : 5;
+    ullong b   : 5;
+    ullong s   : 2;
+    ullong v   : 5;
+    ullong k   : 5;
+    ullong l   : 2;
+    ullong w   : 1;
+    ullong brd : 1;
+    ullong osz : 1;
+};
+
+struct x86_arg
+{
+    uint enc;
+    uint opr;
+    uint ord;
+    x86_operands q;
+};
+
+struct x86_opc_prefix
+{
+    uint pfx;
+    uint pfx_w;
+    uint pfx_o;
+    uint modfun;
+    uint modreg;
+    uint modmem;
+};
+
+x86_opr_formatter x86_format_intel_hex;
+x86_opr_formatter x86_format_intel_dec;
+x86_opr_formats x86_opr_formats_intel_hex;
+x86_opr_formats x86_opr_formats_intel_dec;
+
+static uint debug;
+
+void x86_set_debug(uint d) { debug = d; }
+
+x86_map_str x86_mode_names[] =
+{
+    { x86_modes_64,             "64"               },
+    { x86_modes_32,             "32"               },
+    { x86_modes_16,             "16"               },
+};
+
+x86_map_str x86_map_names[] =
+{
+    { x86_map_map6,            "map6"              },
+    { x86_map_map5,            "map5"              },
+    { x86_map_map4,            "map4"              },
+    { x86_map_0f3a,            "0f3a"              },
+    { x86_map_0f38,            "0f38"              },
+    { x86_map_0f,              "0f"                },
+};
+
+x86_map_str x86_ord_names[] =
+{
+    { x86_ord_rflags,           "rflags"           },
+    { x86_ord_mxcsr,            "mxcsr"            },
+    { x86_ord_xmm0_7,           "xmm0_7"           },
+    { x86_ord_xmm0,             "xmm0"             },
+    { x86_ord_seg,              "seg"              },
+    { x86_ord_stx,              "stx"              },
+    { x86_ord_st0,              "st0"              },
+    { x86_ord_rdi,              "rdi"              },
+    { x86_ord_rsi,              "rsi"              },
+    { x86_ord_rbp,              "rbp"              },
+    { x86_ord_rsp,              "rsp"              },
+    { x86_ord_rbx,              "rbx"              },
+    { x86_ord_rdx,              "rdx"              },
+    { x86_ord_rcx,              "rcx"              },
+    { x86_ord_rax,              "rax"              },
+    { x86_ord_one,              "one"              },
+    { x86_ord_ime,              "ime"              },
+    { x86_ord_is4,              "is4"              },
+    { x86_ord_sib,              "sib"              },
+    { x86_ord_vec,              "vec"              },
+    { x86_ord_opr,              "opr"              },
+    { x86_ord_mrm,              "mrm"              },
+    { x86_ord_reg,              "reg"              },
+    { x86_ord_imm,              "imm"              },
+    { x86_ord_rwi,              "rwi"              },
+    { x86_ord_wi,               "wi"               },
+    { x86_ord_ri,               "ri"               },
+    { x86_ord_rw,               "rw"               },
+    { x86_ord_i,                "i"                },
+    { x86_ord_r,                "r"                },
+    { x86_ord_w,                "w"                },
+    { 0,                        NULL               },
+};
+
+x86_map_str x86_opr_names[] =
+{
+    { x86_opr_bnd_mem,          "bnd/mem"          },
+    { x86_opr_k_m64,            "k/m64"            },
+    { x86_opr_k_m32,            "k/m32"            },
+    { x86_opr_k_m16,            "k/m16"            },
+    { x86_opr_k_m8,             "k/m8"             },
+    { x86_opr_vm64z,            "vm64z"            },
+    { x86_opr_vm64y,            "vm64y"            },
+    { x86_opr_vm64x,            "vm64x"            },
+    { x86_opr_vm32z,            "vm32z"            },
+    { x86_opr_vm32y,            "vm32y"            },
+    { x86_opr_vm32x,            "vm32x"            },
+    { x86_opr_rw_mw,            "rw/mw"            },
+    { x86_opr_r64_m64,          "r64/m64"          },
+    { x86_opr_r64_m32,          "r64/m32"          },
+    { x86_opr_r64_m16,          "r64/m16"          },
+    { x86_opr_r64_m8,           "r64/m8"           },
+    { x86_opr_r32_m32,          "r32/m32"          },
+    { x86_opr_r32_m16,          "r32/m16"          },
+    { x86_opr_r32_m8,           "r32/m8"           },
+    { x86_opr_r16_m16,          "r16/m16"          },
+    { x86_opr_r16_m8,           "r16/m8"           },
+    { x86_opr_r8_m8,            "r8/m8"            },
+    { x86_opr_zmm_m512_m64bcst, "zmm/m512/m64bcst" },
+    { x86_opr_zmm_m512_m32bcst, "zmm/m512/m32bcst" },
+    { x86_opr_zmm_m512_m16bcst, "zmm/m512/m16bcst" },
+    { x86_opr_ymm_m256_m64bcst, "ymm/m256/m64bcst" },
+    { x86_opr_ymm_m256_m32bcst, "ymm/m256/m32bcst" },
+    { x86_opr_ymm_m256_m16bcst, "ymm/m256/m16bcst" },
+    { x86_opr_xmm_m128_m64bcst, "xmm/m128/m64bcst" },
+    { x86_opr_xmm_m128_m32bcst, "xmm/m128/m32bcst" },
+    { x86_opr_xmm_m128_m16bcst, "xmm/m128/m16bcst" },
+    { x86_opr_xmm_m64_m32bcst,  "xmm/m64/m32bcst"  },
+    { x86_opr_xmm_m64_m16bcst,  "xmm/m64/m16bcst"  },
+    { x86_opr_xmm_m32_m16bcst,  "xmm/m32/m16bcst"  },
+    { x86_opr_zmm_m512,         "zmm/m512"         },
+    { x86_opr_ymm_m256,         "ymm/m256"         },
+    { x86_opr_xmm_m128,         "xmm/m128"         },
+    { x86_opr_xmm_m64,          "xmm/m64"          },
+    { x86_opr_xmm_m32,          "xmm/m32"          },
+    { x86_opr_xmm_m16,          "xmm/m16"          },
+    { x86_opr_xmm_m8,           "xmm/m8"           },
+    { x86_opr_mm_m64,           "mm/m64"           },
+    { x86_opr_mm_m32,           "mm/m32"           },
+    { x86_opr_mp,               "mp"               },
+    { x86_opr_mw,               "mw"               },
+    { x86_opr_vm64,             "vm64"             },
+    { x86_opr_vm32,             "vm32"             },
+    { x86_opr_r_m64,            "r/m64"            },
+    { x86_opr_r_m32,            "r/m32"            },
+    { x86_opr_r_m16,            "r/m16"            },
+    { x86_opr_r_m8,             "r/m8"             },
+    { x86_opr_m64bcst,          "m64bcst"          },
+    { x86_opr_m32bcst,          "m32bcst"          },
+    { x86_opr_m16bcst,          "m16bcst"          },
+    { x86_opr_mib,              "mib"              },
+    { x86_opr_m384,             "m384"             },
+    { x86_opr_m80,              "m80"              },
+    { x86_opr_m512,             "m512"             },
+    { x86_opr_m256,             "m256"             },
+    { x86_opr_m128,             "m128"             },
+    { x86_opr_m64,              "m64"              },
+    { x86_opr_m32,              "m32"              },
+    { x86_opr_m16,              "m16"              },
+    { x86_opr_m8,               "m8"               },
+    { x86_opr_seg_gs,           "gs"               },
+    { x86_opr_seg_fs,           "fs"               },
+    { x86_opr_seg_ds,           "ds"               },
+    { x86_opr_seg_ss,           "ss"               },
+    { x86_opr_seg_cs,           "cs"               },
+    { x86_opr_seg_es,           "es"               },
+    { x86_opr_reg_xmm0_7,       "xmm0_7"           },
+    { x86_opr_reg_xmm0,         "xmm0"             },
+    { x86_opr_reg_pdi,          "pdi"              },
+    { x86_opr_reg_psi,          "psi"              },
+    { x86_opr_reg_pb,           "pb"               },
+    { x86_opr_reg_pd,           "pd"               },
+    { x86_opr_reg_pc,           "pc"               },
+    { x86_opr_reg_pa,           "pa"               },
+    { x86_opr_reg_bw,           "bw"               },
+    { x86_opr_reg_dw,           "dw"               },
+    { x86_opr_reg_cw,           "cw"               },
+    { x86_opr_reg_aw,           "aw"               },
+    { x86_opr_reg_rbx,          "rbx"              },
+    { x86_opr_reg_rdx,          "rdx"              },
+    { x86_opr_reg_rcx,          "rcx"              },
+    { x86_opr_reg_rax,          "rax"              },
+    { x86_opr_reg_ebx,          "ebx"              },
+    { x86_opr_reg_edx,          "edx"              },
+    { x86_opr_reg_ecx,          "ecx"              },
+    { x86_opr_reg_eax,          "eax"              },
+    { x86_opr_reg_bx,           "bx"               },
+    { x86_opr_reg_dx,           "dx"               },
+    { x86_opr_reg_cx,           "cx"               },
+    { x86_opr_reg_ax,           "ax"               },
+    { x86_opr_reg_cl,           "cl"               },
+    { x86_opr_reg_al,           "al"               },
+    { x86_opr_reg_st0,          "st0"              },
+    { x86_opr_reg_v0,           "v0"               },
+    { x86_opr_reg_ah,           "ah"               },
+    { x86_opr_reg_di,           "di"               },
+    { x86_opr_reg_si,           "si"               },
+    { x86_opr_reg_d,            "d"                },
+    { x86_opr_reg_c,            "c"                },
+    { x86_opr_reg_a,            "a"                },
+    { x86_opr_memfar16_64,      "memfar16:64"      },
+    { x86_opr_memfar16_32,      "memfar16:32"      },
+    { x86_opr_memfar16_16,      "memfar16:16"      },
+    { x86_opr_far16_32,         "far16:32"         },
+    { x86_opr_far16_16,         "far16:16"         },
+    { x86_opr_relw,             "relw"             },
+    { x86_opr_rel8,             "rel8"             },
+    { x86_opr_moffs,            "moffs"            },
+    { x86_opr_1,                "1"                },
+    { x86_opr_zmm,              "zmm"              },
+    { x86_opr_ymm,              "ymm"              },
+    { x86_opr_xmm,              "xmm"              },
+    { x86_opr_mm,               "mm"               },
+    { x86_opr_ra,               "ra"               },
+    { x86_opr_rw,               "rw"               },
+    { x86_opr_r64,              "r64"              },
+    { x86_opr_r32,              "r32"              },
+    { x86_opr_r16,              "r16"              },
+    { x86_opr_r8,               "r8"               },
+    { x86_opr_iw,               "iw"               },
+    { x86_opr_iwd,              "iwd"              },
+    { x86_opr_i64,              "i64"              },
+    { x86_opr_i32,              "i32"              },
+    { x86_opr_i16,              "i16"              },
+    { x86_opr_ib,               "ib"               },
+    { x86_opr_bnd,              "bnd"              },
+    { x86_opr_dreg,             "dreg"             },
+    { x86_opr_creg,             "creg"             },
+    { x86_opr_seg,              "seg"              },
+    { x86_opr_k,                "k"                },
+    { x86_opr_st,               "st"               },
+    { x86_opr_mmx,              "mmx"              },
+    { x86_opr_vec,              "vec"              },
+    { x86_opr_reg,              "reg"              },
+    { x86_opr_imm,              "imm"              },
+    { x86_opr_bcst,             "bcst"             },
+    { x86_opr_mem,              "mem"              },
+    { x86_opr_flag_er,          "{er}"             },
+    { x86_opr_flag_k,           "{k}"              },
+    { x86_opr_flag_sae,         "{sae}"            },
+    { x86_opr_flag_z,           "{z}"              },
+    { x86_opr_flag_rs2,         "{rs2}"            },
+    { x86_opr_flag_rs4,         "{rs4}"            },
+    { x86_opr_f64x8,            "/f64x8"           },
+    { x86_opr_f64x4,            "/f64x4"           },
+    { x86_opr_f64x2,            "/f64x2"           },
+    { x86_opr_f64x1,            "/f64x1"           },
+    { x86_opr_f32x16,           "/f32x16"          },
+    { x86_opr_f32x8,            "/f32x8"           },
+    { x86_opr_f32x4,            "/f32x4"           },
+    { x86_opr_f32x2,            "/f32x2"           },
+    { x86_opr_f32x1,            "/f32x1"           },
+    { x86_opr_f16x32,           "/f16x32"          },
+    { x86_opr_f16x16,           "/f16x16"          },
+    { x86_opr_f16x8,            "/f16x8"           },
+    { x86_opr_f16x4,            "/f16x4"           },
+    { x86_opr_f16x2,            "/f16x2"           },
+    { x86_opr_f16x1,            "/f16x1"           },
+    { x86_opr_f8x64,            "/f8x64"           },
+    { x86_opr_f8x32,            "/f8x32"           },
+    { x86_opr_f8x16,            "/f8x16"           },
+    { x86_opr_f8x8,             "/f8x8"            },
+    { x86_opr_f8x4,             "/f8x4"            },
+    { x86_opr_f8x2,             "/f8x2"            },
+    { x86_opr_f8x1,             "/f8x1"            },
+    { x86_opr_i512x1,           "/i512x1"          },
+    { x86_opr_i256x2,           "/i256x2"          },
+    { x86_opr_i256x1,           "/i256x1"          },
+    { x86_opr_i128x4,           "/i128x4"          },
+    { x86_opr_i128x2,           "/i128x2"          },
+    { x86_opr_i128x1,           "/i128x1"          },
+    { x86_opr_i64x8,            "/i64x8"           },
+    { x86_opr_i64x4,            "/i64x4"           },
+    { x86_opr_i64x2,            "/i64x2"           },
+    { x86_opr_i64x1,            "/i64x1"           },
+    { x86_opr_i32x16,           "/i32x16"          },
+    { x86_opr_i32x8,            "/i32x8"           },
+    { x86_opr_i32x4,            "/i32x4"           },
+    { x86_opr_i32x2,            "/i32x2"           },
+    { x86_opr_i32x1,            "/i32x1"           },
+    { x86_opr_i16x32,           "/i16x32"          },
+    { x86_opr_i16x16,           "/i16x16"          },
+    { x86_opr_i16x8,            "/i16x8"           },
+    { x86_opr_i16x4,            "/i16x4"           },
+    { x86_opr_i16x2,            "/i16x2"           },
+    { x86_opr_i16x1,            "/i16x1"           },
+    { x86_opr_i8x64,            "/i8x64"           },
+    { x86_opr_i8x32,            "/i8x32"           },
+    { x86_opr_i8x16,            "/i8x16"           },
+    { x86_opr_i8x8,             "/i8x8"            },
+    { x86_opr_i8x4,             "/i8x4"            },
+    { x86_opr_i8x2,             "/i8x2"            },
+    { x86_opr_i8x1,             "/i8x1"            },
+    { 0,                        NULL               },
+};
+
+x86_map_str x86_enc_names[] =
+{
+    { x86_enc_r_norexb,         " .norexb"         },
+    { x86_enc_r_lock,           " .lock"           },
+    { x86_enc_r_rep,            " .rep"            },
+    { x86_enc_s_a64,            " .a64"            },
+    { x86_enc_s_a32,            " .a32"            },
+    { x86_enc_s_a16,            " .a16"            },
+    { x86_enc_s_o64,            " .o64"            },
+    { x86_enc_s_o32,            " .o32"            },
+    { x86_enc_s_o16,            " .o16"            },
+    { x86_enc_j_i16,            " i16"             },
+    { x86_enc_j_ib,             " ib"              },
+    { x86_enc_i_i64,            " i64"             },
+    { x86_enc_i_i32,            " i32"             },
+    { x86_enc_i_i16,            " i16"             },
+    { x86_enc_i_iwd,            " iwd"             },
+    { x86_enc_i_iw,             " iw"              },
+    { x86_enc_i_ib,             " ib"              },
+    { x86_enc_f_opcode_r,       ""                 },
+    { x86_enc_f_opcode,         ""                 },
+    { x86_enc_f_modrm_n,        ""                 },
+    { x86_enc_f_modrm_r,        ""                 },
+    { x86_enc_o_opcode_r,       ""                 },
+    { x86_enc_t_evex,           ".evex"            },
+    { x86_enc_t_vex,            ".vex"             },
+    { x86_enc_t_lex,            ".lex"             },
+    { x86_enc_l_lig,            ".lig"             },
+    { x86_enc_l_512,            ".512"             },
+    { x86_enc_l_256,            ".256"             },
+    { x86_enc_l_128,            ".128"             },
+    { x86_enc_l_l1,             ".l1"              },
+    { x86_enc_l_l0,             ".l0"              },
+    { x86_enc_l_lz,             ".lz"              },
+    { x86_enc_p_rexw,           ".w"               },
+    { x86_enc_p_9b,             ".9b"              },
+    { x86_enc_p_f2,             ".f2"              },
+    { x86_enc_p_f3,             ".f3"              },
+    { x86_enc_p_66,             ".66"              },
+    { x86_enc_m_map6,           ".map6"            },
+    { x86_enc_m_map5,           ".map5"            },
+    { x86_enc_m_map4,           ".map4"            },
+    { x86_enc_m_0f3a,           ".0f3a"            },
+    { x86_enc_m_0f38,           ".0f38"            },
+    { x86_enc_m_0f,             ".0f"              },
+    { x86_enc_w_wig,            ".wig"             },
+    { x86_enc_w_ww,             ".ww"              },
+    { x86_enc_w_wx,             ".wx"              },
+    { x86_enc_w_wn,             ".wn"              },
+    { x86_enc_w_wb,             ".wb"              },
+    { x86_enc_w_w1,             ".w1"              },
+    { x86_enc_w_w0,             ".w0"              },
+    { 0,                        NULL               },
+};
+
+/*
+ *  string tables
+ */
+
+static size_t x86_name_map(x86_map_str *p, char * buf, size_t len, uint ord,
+    const char *sep)
+{
+    size_t count = 0;
+    int ret = 0;
+    for (;;) {
+        while (p->str != NULL) {
+            if (p->ord && (p->ord & ord) == p->ord) {
+                ord = ord & ~p->ord;
+                break;
+            }
+            p++;
+        }
+        if (p->str == NULL) break;
+        if (buf == NULL || len - count <= len) {
+            ret = snprintf(buf ? buf + count : NULL,
+                buf ? len - count : 0,
+                "%s%s", count == 0 ? "" : sep, p->str);
+            if (ret > 0) count += ret;
+        }
+    }
+    return count;
+}
+
+size_t x86_mode_name(char *buf, size_t len, uint mode, const char *sep)
+{
+    return x86_name_map(x86_mode_names, buf, len, mode, sep);
+}
+
+size_t x86_map_name(char *buf, size_t len, uint mode, const char *sep)
+{
+    return x86_name_map(x86_map_names, buf, len, mode, sep);
+}
+
+size_t x86_ord_name(char *buf, size_t len, uint ord, const char *sep)
+{
+    return x86_name_map(x86_ord_names, buf, len, ord, sep);
+}
+
+size_t x86_opr_name(char *buf, size_t len, uint opr)
+{
+    return x86_name_map(x86_opr_names, buf, len, opr, "");
+}
+
+size_t x86_enc_name(char *buf, size_t len, uint enc)
+{
+    return x86_name_map(x86_enc_names, buf, len, enc, "");
+}
+
+const char *x86_reg_name(uint reg)
+{
+    return (reg < 512) ? x86_reg_names[reg] : "invalid";
+}
+
+size_t x86_ord_mnem(char *buf, size_t len, const ushort *ord)
+{
+    const char codes[8] = " -irmvo ";
+    size_t count = 0;
+    for (size_t i = 0; i < array_size(x86_ord_table[0].ord) && ord[i]; i++) {
+        uint type = x86_ord_type_val(ord[i]);
+        if (buf && count < len) {
+            buf[count++] = codes[type];
+        }
+    }
+    buf[count] = '\0';
+    return count;
+}
+
+const char *x86_table_type_name(uint type)
+{
+    switch (type) {
+    case x86_table_none: return "none";
+    case x86_table_lex: return "lex";
+    case x86_table_vex: return "vex";
+    case x86_table_evex: return "evex";
+    default: return "";
+    }
+}
+
+const char *x86_table_map_name(uint map)
+{
+    switch (map) {
+    case x86_map_none: return "";
+    case x86_map_0f: return "0f";
+    case x86_map_0f38: return "0f38";
+    case x86_map_0f3a: return "0f3a";
+    case x86_map_map4: return "map4";
+    case x86_map_map5: return "map5";
+    case x86_map_map6: return "map6";
+    default: return "";
+    }
+}
+
+const char *x86_table_prefix_name(uint prefix)
+{
+    switch (prefix) {
+    case x86_pfx_66: return "66";
+    case x86_pfx_f3: return "f3";
+    case x86_pfx_f2: return "f2";
+    case x86_pfx_9b: return "9b";
+    case x86_pfx_66 | x86_pfx_rexw: return "66+w";
+    case x86_pfx_f3 | x86_pfx_rexw: return "f3+w";
+    case x86_pfx_f2 | x86_pfx_rexw: return "f2+w";
+    case x86_pfx_9b | x86_pfx_rexw: return "9b+w";
+    default: return "";
+    }
+}
+
+/*
+ *  metadata filters
+ */
+
+int x86_enc_filter_rex(x86_rex prefix, uint enc)
+{
+    uint lex =  (enc & x86_enc_t_mask) == x86_enc_t_lex;
+    uint ew0 =  (enc & x86_enc_w_mask) == x86_enc_w_w0;
+    uint ew1 =  (enc & x86_enc_w_mask) == x86_enc_w_w1;
+    uint ewn =  (enc & x86_enc_w_mask) == x86_enc_w_wn;
+    uint ewb =  (enc & x86_enc_w_mask) == x86_enc_w_wb;
+    uint eww =  (enc & x86_enc_w_mask) == x86_enc_w_ww;
+    uint ewx =  (enc & x86_enc_w_mask) == x86_enc_w_wx;
+    uint ewig = (enc & x86_enc_w_mask) == x86_enc_w_wig;
+    uint norexb = (enc & x86_enc_r_norexb) != 0;
+
+    uint w = (prefix.data[0] >> 3) & 1;
+
+    if (!lex) return -1;
+    if (norexb) return -1;
+
+    switch (w) {
+    case x86_vex_w0:
+        if (!(ew0 || ewig || ewn || ewb || eww || ewx)) return -1;
+        break;
+    case x86_vex_w1:
+        if (!(ew1 || ewig || ewn || ewb || eww || ewx)) return -1;
+        break;
+    }
+
+    return 0;
+}
+
+int x86_enc_filter_rex2(x86_rex2 prefix, uint enc)
+{
+    uint lex =  (enc & x86_enc_t_mask) == x86_enc_t_lex;
+    uint ew0 =  (enc & x86_enc_w_mask) == x86_enc_w_w0;
+    uint ew1 =  (enc & x86_enc_w_mask) == x86_enc_w_w1;
+    uint ewig = (enc & x86_enc_w_mask) == x86_enc_w_wig;
+    uint em =   (enc & x86_enc_m_mask) >> x86_enc_m_shift;
+
+    uint m = (prefix.data[0] >> 7) & 1;
+    uint w = (prefix.data[0] >> 3) & 1;
+
+    if (!lex || m != em) return -1;
+
+    switch (w) {
+    case x86_vex_w0: if (!(ew0 || ewig)) return -1; break;
+    case x86_vex_w1: if (!(ew1 || ewig)) return -1; break;
+    default: return -1;
+    }
+
+    return 0;
+}
+
+int x86_enc_filter_vex2(x86_vex2 prefix, uint enc)
+{
+    uint vex =  (enc & x86_enc_t_mask) == x86_enc_t_vex;
+    uint ew0 =  (enc & x86_enc_w_mask) == x86_enc_w_w0;
+    uint ewig = (enc & x86_enc_w_mask) == x86_enc_w_wig;
+    uint lz =   (enc & x86_enc_l_mask) == x86_enc_l_lz;
+    uint l0 =   (enc & x86_enc_l_mask) == x86_enc_l_l0;
+    uint l1 =   (enc & x86_enc_l_mask) == x86_enc_l_l1;
+    uint l128 = (enc & x86_enc_l_mask) == x86_enc_l_128;
+    uint l256 = (enc & x86_enc_l_mask) == x86_enc_l_256;
+    uint lig =  (enc & x86_enc_l_mask) == x86_enc_l_lig;
+    uint np =   (enc & x86_enc_p_mask) == x86_enc_p_none;
+    uint p66 =  (enc & x86_enc_p_mask) == x86_enc_p_66;
+    uint pf2 =  (enc & x86_enc_p_mask) == x86_enc_p_f2;
+    uint pf3 =  (enc & x86_enc_p_mask) == x86_enc_p_f3;
+    uint em =   (enc & x86_enc_m_mask) >> x86_enc_m_shift;
+
+    uint p = (prefix.data[0] >> 0) & 3;
+    uint l = (prefix.data[0] >> 2) & 1;
+
+    if (!vex || !(ew0 || ewig) || x86_map_0f != em) return -1;
+
+    switch (l) {
+    case x86_vex_l0: if (!(lig || lz || l0 || l128)) return -1; break;
+    case x86_vex_l1: if (!(lig || l1 || l256)) return -1; break;
+    default: return -1;
+    }
+
+    switch (p) {
+    case x86_pfx_none: if (!np) return -1; break;
+    case x86_pfx_66: if (!p66) return -1; break;
+    case x86_pfx_f2: if (!pf2) return -1; break;
+    case x86_pfx_f3: if (!pf3) return -1; break;
+    default: return -1;
+    }
+
+    return 0;
+}
+
+int x86_enc_filter_vex3(x86_vex3 prefix, uint enc)
+{
+    uint vex =  (enc & x86_enc_t_mask) == x86_enc_t_vex;
+    uint ew0 =  (enc & x86_enc_w_mask) == x86_enc_w_w0;
+    uint ew1 =  (enc & x86_enc_w_mask) == x86_enc_w_w1;
+    uint ewig = (enc & x86_enc_w_mask) == x86_enc_w_wig;
+    uint lz =   (enc & x86_enc_l_mask) == x86_enc_l_lz;
+    uint l0 =   (enc & x86_enc_l_mask) == x86_enc_l_l0;
+    uint l1 =   (enc & x86_enc_l_mask) == x86_enc_l_l1;
+    uint l128 = (enc & x86_enc_l_mask) == x86_enc_l_128;
+    uint l256 = (enc & x86_enc_l_mask) == x86_enc_l_256;
+    uint lig =  (enc & x86_enc_l_mask) == x86_enc_l_lig;
+    uint np =   (enc & x86_enc_p_mask) == x86_enc_p_none;
+    uint p66 =  (enc & x86_enc_p_mask) == x86_enc_p_66;
+    uint pf2 =  (enc & x86_enc_p_mask) == x86_enc_p_f2;
+    uint pf3 =  (enc & x86_enc_p_mask) == x86_enc_p_f3;
+    uint em =   (enc & x86_enc_m_mask) >> x86_enc_m_shift;
+
+    uint m = (prefix.data[0] >> 0) & 31;
+    uint w = (prefix.data[1] >> 7) & 1;
+    uint p = (prefix.data[1] >> 0) & 3;
+    uint l = (prefix.data[1] >> 2) & 1;
+
+    if (!vex || m != em) return -1;
+
+    switch (w) {
+    case x86_vex_w0: if (!(ew0 || ewig)) return -1; break;
+    case x86_vex_w1: if (!(ew1 || ewig)) return -1; break;
+    default: return -1;
+    }
+
+    switch (l) {
+    case x86_vex_l0: if (!(lig || lz || l0 || l128)) return -1; break;
+    case x86_vex_l1: if (!(lig || l1 || l256)) return -1; break;
+    default: return -1;
+    }
+
+    switch (p) {
+    case x86_pfx_none: if (!np) return -1; break;
+    case x86_pfx_66: if (!p66) return -1; break;
+    case x86_pfx_f2: if (!pf2) return -1; break;
+    case x86_pfx_f3: if (!pf3) return -1; break;
+    default: return -1;
+    }
+
+    return 0;
+}
+
+int x86_enc_filter_evex(x86_evex prefix, uint enc)
+{
+    uint evex = (enc & x86_enc_t_mask) == x86_enc_t_evex;
+    uint ew0 =  (enc & x86_enc_w_mask) == x86_enc_w_w0;
+    uint ew1 =  (enc & x86_enc_w_mask) == x86_enc_w_w1;
+    uint ewig = (enc & x86_enc_w_mask) == x86_enc_w_wig;
+    uint l128 = (enc & x86_enc_l_mask) == x86_enc_l_128;
+    uint l256 = (enc & x86_enc_l_mask) == x86_enc_l_256;
+    uint l512 = (enc & x86_enc_l_mask) == x86_enc_l_512;
+    uint lig =  (enc & x86_enc_l_mask) == x86_enc_l_lig;
+    uint np =   (enc & x86_enc_p_mask) == x86_enc_p_none;
+    uint p66 =  (enc & x86_enc_p_mask) == x86_enc_p_66;
+    uint pf2 =  (enc & x86_enc_p_mask) == x86_enc_p_f2;
+    uint pf3 =  (enc & x86_enc_p_mask) == x86_enc_p_f3;
+    uint em =   (enc & x86_enc_m_mask) >> x86_enc_m_shift;
+
+    uint m = (prefix.data[0] >> 0) & 7;
+    uint w = (prefix.data[1] >> 7) & 1;
+    uint p = (prefix.data[1] >> 0) & 3;
+    uint l = (prefix.data[2] >> 5) & 3;
+
+    if (!evex || m != em) return -1;
+
+    switch (w) {
+    case x86_vex_w0: if (!(ew0 || ewig)) return -1; break;
+    case x86_vex_w1: if (!(ew1 || ewig)) return -1; break;
+    default: return -1;
+    }
+
+    switch (l) {
+    case x86_vex_l0: if (!(lig || l128)) return -1; break;
+    case x86_vex_l1: if (!(lig || l256)) return -1; break;
+    case x86_vex_l2: if (!(lig || l512)) return -1; break;
+    default: return -1;
+    }
+
+    switch (p) {
+    case x86_pfx_none: if (!np) return -1; break;
+    case x86_pfx_66: if (!p66) return -1; break;
+    case x86_pfx_f2: if (!pf2) return -1; break;
+    case x86_pfx_f3: if (!pf3) return -1; break;
+    default: return -1;
+    }
+
+    return 0;
+}
+
+/*
+ *  table sorting
+ */
+
+static int x86_opc_data_compare_opcode(const void *p1, const void *p2)
+{
+    const x86_opc_data *op1 = x86_opc_table + *(size_t *)p1;
+    const x86_opc_data *op2 = x86_opc_table + *(size_t *)p2;
+
+    /* split into prefix and suffix */
+    uint mask = x86_enc_t_mask | x86_enc_p_mask | x86_enc_m_mask;
+    uint op1pre = op1->enc & mask;
+    uint op2pre = op2->enc & mask;
+    uint op1suf = op1->enc & ~mask;
+    uint op2suf = op2->enc & ~mask;
+
+    if (op1pre < op2pre) return -1;
+    if (op1pre > op2pre) return 1;
+    if (op1->opc[0] < op2->opc[0]) return -1;
+    if (op1->opc[0] > op2->opc[0]) return 1;
+    if (op1->opc[1] < op2->opc[1]) return -1;
+    if (op1->opc[1] > op2->opc[1]) return 1;
+    if (op1suf < op2suf) return -1;
+    if (op1suf > op2suf) return 1;
+    return 0;
+}
+
+static int x86_opc_data_compare_alpha(const void *p1, const void *p2)
+{
+    const x86_opc_data *op1 = x86_opc_table + *(size_t *)p1;
+    const x86_opc_data *op2 = x86_opc_table + *(size_t *)p2;
+    int alpha = strcmp(x86_op_names[op1->op], x86_op_names[op2->op]);
+    if (alpha == 0) return x86_opc_data_compare_opcode(p1, p2);
+    else return alpha;
+}
+
+static x86_table_idx x86_opc_table_index(size_t n)
+{
+    x86_table_idx tab = { n, g_malloc(sizeof(size_t) * n) };
+    for (size_t i = 0; i < tab.count; i++) tab.idx[i] = i;
+    return tab;
+}
+
+x86_table_idx x86_opc_table_identity(void)
+{
+    return x86_opc_table_index(x86_opc_table_size);
+}
+
+x86_table_idx x86_opc_table_sorted(x86_table_idx tab, uint sort)
+{
+    switch (sort) {
+    case x86_sort_none:
+        break;
+    case x86_sort_numeric:
+        qsort(tab.idx, tab.count, sizeof(size_t), x86_opc_data_compare_opcode);
+        break;
+    case x86_sort_alpha:
+        qsort(tab.idx, tab.count, sizeof(size_t), x86_opc_data_compare_alpha);
+        break;
+    }
+    return tab;
+}
+
+x86_table_idx x86_opc_table_filter(x86_table_idx tab, uint modes)
+{
+    size_t count = 0;
+    for (size_t i = 0; i < tab.count; i++) {
+        const x86_opc_data *d = x86_opc_table + tab.idx[i];
+        if (d->mode & modes) count++;
+    }
+    x86_table_idx newtab = { count, g_malloc(sizeof(size_t) * count) };
+    count = 0;
+    for (size_t i = 0; i < tab.count; i++) {
+        const x86_opc_data *d = x86_opc_table + tab.idx[i];
+        if (d->mode & modes) newtab.idx[count++] = i;
+    }
+    g_free(tab.idx);
+    return newtab;
+}
+
+static int x86_opc_data_compare_masked(const void *p1, const void *p2)
+{
+    x86_opc_data *om1 = (x86_opc_data *)p1;
+    x86_opc_data *om2 = (x86_opc_data *)p2;
+    uint enc1 = om1->enc, enc2 = om2->enc;
+    uint pre1 = enc1 & (x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    uint pre2 = enc2 & (x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    if (pre1 < pre2) return -1;
+    if (pre1 > pre2) return 1;
+    ushort mask_s = om1->opm_s & om2->opm_s;
+    if ((om1->opc_s & mask_s) < (om2->opc_s & mask_s)) return -1;
+    if ((om1->opc_s & mask_s) > (om2->opc_s & mask_s)) return 1;
+    /* suffix is unnecessary for matching opcodes because it contains format*/
+    return 0;
+}
+
+static int x86_opc_data_compare_build(const void *p1, const void *p2)
+{
+    x86_opc_data *om1 = (x86_opc_data *)p1;
+    x86_opc_data *om2 = (x86_opc_data *)p2;
+    uint enc1 = om1->enc, enc2 = om2->enc;
+    uint pre1 = enc1 & (x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    uint pre2 = enc2 & (x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    if (pre1 < pre2) return -1;
+    if (pre1 > pre2) return 1;
+    for (size_t i = 0; i < 2; i++) {
+        if (om1->opc[i] < om2->opc[i]) return -1;
+        if (om1->opc[i] > om2->opc[i]) return 1;
+        /* most specific mask first for fixed modrm */
+        if (om1->opm[i] < om2->opm[i]) return 1;
+        if (om1->opm[i] > om2->opm[i]) return -1;
+    }
+    uint suf1 = enc1 & ~(x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    uint suf2 = enc2 & ~(x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    if (suf1 < suf2) return -1;
+    if (suf1 > suf2) return 1;
+    return 0;
+}
+
+/*
+ *  table construction
+ */
+
+static x86_opc_prefix x86_table_make_prefix(const x86_opc_data *d,
+    const x86_opr_data *o, const x86_ord_data *p)
+{
+    x86_opc_prefix tp;
+    memset(&tp, 0, sizeof(tp));
+
+    /* extract prefix and synthesize width prefixes */
+    switch (x86_enc_type(d->enc)) {
+    case x86_enc_t_lex:
+    case x86_enc_t_vex:
+    case x86_enc_t_evex:
+        switch (d->enc & x86_enc_w_mask) {
+        case x86_enc_w_wig:
+        case x86_enc_w_wn:
+        case x86_enc_w_wb:
+        case x86_enc_w_w0: break;
+        case x86_enc_w_w1: tp.pfx = x86_enc_p_rexw; break;
+        case x86_enc_w_wx: tp.pfx_w = x86_enc_p_rexw; /* fallthrough */
+        case x86_enc_w_ww: tp.pfx_o = x86_enc_p_66; break;
+        }
+        break;
+     }
+
+    /* find register or memory operand mapping to modrm.rm field
+     * so that we can add mod=0b11 or mod!=0b11 to modrm mask */
+    tp.modfun = x86_enc_func(d->enc) == x86_enc_f_modrm_n;
+    for (size_t i = 0; i < array_size(o->opr) && o->opr[i]; i++) {
+        uint isreg = x86_opr_type_val(o->opr[i]) >= x86_opr_reg;
+        uint ismem = x86_opr_has_mem(o->opr[i]);
+        uint ismrm = x86_ord_type_val(p->ord[i]) == x86_ord_mrm;
+        if (ismrm) {
+            if (isreg && !ismem) {
+                tp.modreg = 1; /* mod == 0b11 */
+                break;
+            } else if (!isreg && ismem) {
+                tp.modmem = 1; /* mod != 0b11 */
+                break;
+            }
+        }
+    }
+
+    /* explict second opcode byte has mod == 0b11 */
+    if (d->opm[1] == 0xff && (d->opc[1] & 0xc0) == 0xc0 &&
+        !tp.modreg && !tp.modmem)
+    {
+        tp.modreg = 1;
+    }
+
+    return tp;
+}
+
+static void x86_build_prefix_clashes(x86_acc_idx *idx, x86_table_idx tab,
+    ullong *modfun, ullong *modmod)
+{
+    /*
+     * record modrm.reg /n or modrm.mod (reg or mem) usage
+     * so that opcodes with clashes can expand mod entries
+     */
+    for (size_t i = 0; i < tab.count; i++) {
+        const x86_opc_data *d = x86_opc_table + tab.idx[i];
+        const x86_opr_data *o = x86_opr_table + d->opr;
+        const x86_ord_data *p = x86_ord_table + d->ord;
+        x86_opc_prefix tp = x86_table_make_prefix(d, o, p);
+        uint type = x86_enc_type(d->enc) >> x86_enc_t_shift;
+        uint prefix = x86_enc_prefix(d->enc) >> x86_enc_p_shift;
+        uint map = x86_enc_map(d->enc) >> x86_enc_m_shift;
+        size_t tpm = x86_acc_page(type, prefix, map);
+        size_t x = (tpm << 8) | d->opc[0];
+        if (tp.modfun) {
+            x86_bitmap_set(modfun, x, 1);
+        }
+        if (tp.modreg || tp.modmem) {
+            x86_bitmap_set(modmod, x, 1);
+        }
+    }
+}
+
+static size_t x86_add_opc_data(x86_opc_data *op_map, size_t idx,
+    x86_opc_data rec, uint modreg, uint modmem, uint modcla)
+{
+    /*
+     * add entries to opcode map, expanding mod entries where
+     * for modreg or modmem constraints or clashes with mod.reg /n
+     */
+    if (op_map) {
+        if (modreg) {
+            /* add one entry with mod == 0b11 - ModRM.rm is register */
+            rec.opm[1] |= 0xc0;
+            rec.opc[1] |= 0xc0;
+            op_map[idx] = rec;
+        } else if (modmem) {
+            /* add three entries with mod != 0b11 - ModRM.rm is memory */
+            rec.opm[1] |= 0xc0;
+            rec.opc[1] = (rec.opc[1] & 0x3f) | 0x80;
+            op_map[idx] = rec;
+            rec.opc[1] = (rec.opc[1] & 0x3f) | 0x40;
+            op_map[idx + 1] = rec;
+            rec.opc[1] = (rec.opc[1] & 0x3f);
+            op_map[idx + 2] = rec;
+        } else if (modcla) {
+            /* add four entries mod (0b00..0b11) due to function clash */
+            rec.opm[1] |= 0xc0;
+            rec.opc[1] = (rec.opc[1] & 0x3f) | 0xc0;
+            op_map[idx] = rec;
+            rec.opc[1] = (rec.opc[1] & 0x3f) | 0x80;
+            op_map[idx + 1] = rec;
+            rec.opc[1] = (rec.opc[1] & 0x3f) | 0x40;
+            op_map[idx + 2] = rec;
+            rec.opc[1] = (rec.opc[1] & 0x3f);
+            op_map[idx + 3] = rec;
+        } else {
+            /* add entry unmodified */
+            op_map[idx] = rec;
+        }
+    }
+    return modreg ? 1 : modmem ? 3 : modcla ? 4 : 1;
+}
+
+static void x86_build_prefix_table(x86_acc_idx *idx,
+    x86_table_idx tab, x86_opc_data *op_map, size_t *count,
+    ullong *modfun, ullong *modmod)
+{
+    /*
+     * build the opcode map with synthesized prefixes and modrm expansion
+     */
+    size_t n = 1;
+    for (size_t i = 0; i < tab.count; i++) {
+        const x86_opc_data *d = x86_opc_table + tab.idx[i];
+        const x86_opr_data *o = x86_opr_table + d->opr;
+        const x86_ord_data *p = x86_ord_table + d->ord;
+
+        uint type = x86_enc_type(d->enc) >> x86_enc_t_shift;
+        uint prefix = x86_enc_prefix(d->enc) >> x86_enc_p_shift;
+        uint map = x86_enc_map(d->enc) >> x86_enc_m_shift;
+        size_t tpm = x86_acc_page(type, prefix, map);
+        size_t x = (tpm << 8) | d->opc[0];
+        uint modcla = x86_bitmap_get(modfun, x) && x86_bitmap_get(modmod, x);
+        x86_opc_prefix tp = x86_table_make_prefix(d, o, p);
+
+        x86_opc_data rec = *d;
+        rec.enc |= tp.pfx;
+        n += x86_add_opc_data(op_map, n, rec,
+            tp.modreg, tp.modmem, modcla);
+        if (tp.pfx_w) {
+            rec = *d;
+            rec.enc |= tp.pfx | tp.pfx_w;
+            n += x86_add_opc_data(op_map, n, rec,
+                tp.modreg, tp.modmem, modcla);
+        }
+        if (tp.pfx_o) {
+            rec = *d;
+            rec.enc |= tp.pfx | tp.pfx_o;
+            n += x86_add_opc_data(op_map, n, rec,
+                tp.modreg, tp.modmem, modcla);
+        }
+    }
+
+    if (count) *count = n;
+}
+
+static size_t x86_build_accel_offsets(x86_acc_idx *idx)
+{
+    /*
+     * allocate offsets for type prefix map combinations
+     *
+     * offset zero means the slice is not allocated but page zero is
+     * preallocated as a special cased for type:LEX, prefix:0, map:0
+     */
+    size_t num_pages = 1;
+    for (size_t i = 1; i < idx->map_count; i++) {
+        const x86_opc_data *m = idx->map + i;
+        uint type = x86_enc_type(m->enc) >> x86_enc_t_shift;
+        uint prefix = x86_enc_prefix(m->enc) >> x86_enc_p_shift;
+        uint map = x86_enc_map(m->enc) >> x86_enc_m_shift;
+        size_t acc_page = x86_acc_page(type, prefix, map);
+        if (acc_page > 0 && idx->page_offsets[acc_page] == 0) {
+            size_t page = num_pages++;
+            idx->page_offsets[acc_page] = page;
+        }
+    }
+    return num_pages << 8;
+}
+
+static void x86_build_accel_table(x86_acc_idx *idx, x86_acc_entry *acc)
+{
+    /*
+     * add entries to the acceleration table. the acceleration
+     * table contains ranges for all entries of a given opcode.
+     *
+     * (type, prefix, map, opcode) -> (index, count)
+     */
+    for (size_t i = 1; i < idx->map_count; i++) {
+        const x86_opc_data *m = idx->map + i;
+        uint type = x86_enc_type(m->enc) >> x86_enc_t_shift;
+        uint prefix = x86_enc_prefix(m->enc) >> x86_enc_p_shift;
+        uint map = x86_enc_map(m->enc) >> x86_enc_m_shift;
+        size_t acc_page = x86_acc_page(type, prefix, map);
+        size_t offset = x86_acc_offset(idx, acc_page);
+        uint opc = m->opc[0], opc_i = opc, opm = m->opm[0];
+        while ((opc_i & opm) == opc) {
+            if (acc[offset + opc_i].idx == 0) {
+                acc[offset + opc_i].idx = i;
+            }
+            acc[offset + opc_i].nent++;
+            opc_i++;
+        }
+    }
+}
+
+static x86_acc_idx *x86_table_build(uint modes)
+{
+    x86_acc_idx *idx = g_malloc0(sizeof(x86_acc_idx));
+    x86_table_idx tab = x86_opc_table_sorted(x86_opc_table_filter(
+        x86_opc_table_identity(), modes), x86_sort_numeric);
+    size_t bmap_size = /*t,p,m*/ 512 * /*opcode*/ 256 >> /*bits*/ 3;
+    ullong *modfun = g_malloc0(bmap_size);
+    ullong *modmod = g_malloc0(bmap_size);
+    x86_build_prefix_clashes(idx, tab, modfun, modmod);
+    x86_build_prefix_table(idx, tab, NULL, &idx->map_count, modfun, modmod);
+    idx->map = g_malloc0(idx->map_count * sizeof(x86_opc_data));
+    x86_build_prefix_table(idx, tab, idx->map, NULL, modfun, modmod);
+    qsort(idx->map, idx->map_count, sizeof(x86_opc_data),
+        x86_opc_data_compare_build);
+    idx->page_offsets = g_malloc0(/*t,p,m*/ 512);
+    idx->acc_count = x86_build_accel_offsets(idx);
+    idx->acc = g_malloc0(idx->acc_count * sizeof(x86_acc_entry));
+    x86_build_accel_table(idx, idx->acc);
+    g_free(tab.idx);
+    g_free(modfun);
+    g_free(modmod);
+    return idx;
+}
+
+/*
+ *  table lookup
+ */
+
+static x86_opc_data *x86_table_lookup_slow(x86_acc_idx *idx,
+    const x86_opc_data *m)
+{
+    size_t begin = 0, end = idx->map_count;
+    while (end != 0) {
+        size_t half = (end >> 1), probe = begin + half;
+        if (x86_opc_data_compare_masked(m, idx->map + probe) > 0) {
+            begin = probe + 1;
+            end -= half + 1;
+        } else {
+            end = half;
+        }
+    }
+    return idx->map + begin;
+}
+
+x86_opc_data *x86_table_lookup(x86_acc_idx *idx, const x86_opc_data *m)
+{
+    uint type = x86_enc_type(m->enc) >> x86_enc_t_shift;
+    uint prefix = x86_enc_prefix(m->enc) >> x86_enc_p_shift;
+    uint map = x86_enc_map(m->enc) >> x86_enc_m_shift;
+    size_t acc_page = x86_acc_page(type, prefix, map);
+    size_t offset = x86_acc_offset(idx, acc_page) + m->opc[0];
+    x86_acc_entry *ent = x86_acc_lookup(idx, offset);
+    x86_acc_idx new_idx = { ent->nent, idx->map + ent->idx };
+    return x86_table_lookup_slow(&new_idx, m);
+}
+
+/*
+ * table printing utilities
+ */
+
+static x86_table_col x86_new_column(int width, char *data)
+{
+    x86_table_col col = { width, strdup(data) };
+    return col;
+}
+
+static void x86_print_row(size_t count, x86_table_col *cols)
+{
+    printf("|");
+    for (size_t i = 0; i < count; i++) {
+        printf(" %-*s |", cols[i].width, cols[i].data);
+        g_free(cols[i].data);
+    }
+    printf("\n");
+}
+
+static size_t x86_format_enc(char *buf, size_t buflen, const x86_opc_data *d)
+{
+    size_t len = 0;
+
+    uint s = x86_enc_suffix(d->enc);
+    uint i = x86_enc_imm(d->enc);
+    uint j = x86_enc_imm2(d->enc);
+    uint enc = x86_enc_leading(d->enc);
+
+    len += x86_enc_name(buf + len, buflen - len, enc);
+
+    switch (x86_enc_opcode(enc)) {
+    case x86_enc_o_opcode_r:
+        len += snprintf(buf + len, buflen - len, " %02hhx+r", d->opc[0]);
+        break;
+    default:
+        len += snprintf(buf + len, buflen - len, " %02hhx", d->opc[0]);
+        break;
+    }
+
+    switch (x86_enc_func(enc)) {
+    case x86_enc_f_modrm_r:
+        len += snprintf(buf + len, buflen - len, " /r");
+        break;
+    case x86_enc_f_modrm_n:
+        len += snprintf(buf + len, buflen - len, " /%d", (d->opc[1] >> 3) & 7);
+        break;
+    case x86_enc_f_opcode_r:
+        len += snprintf(buf + len, buflen - len, " %02hhx+r", d->opc[1]);
+        break;
+    case x86_enc_f_opcode:
+        len += snprintf(buf + len, buflen - len, " %02hhx", d->opc[1]);
+        break;
+    }
+
+    if (i) {
+        len += x86_enc_name(buf + len, buflen - len, i);
+    }
+    if (j) {
+        len += x86_enc_name(buf + len, buflen - len, j);
+    }
+    if (s) {
+        len += x86_enc_name(buf + len, buflen - len, s);
+    }
+
+    return len;
+}
+
+void x86_print_op(const x86_opc_data *d, uint compact, uint opcode)
+{
+    char buf[256];
+    x86_table_col cols[6];
+    size_t count = 0, buflen = sizeof(buf), len;
+
+    const x86_opr_data *o = x86_opr_table + d->opr;
+    const x86_ord_data *p = x86_ord_table + d->ord;
+
+    buf[(len = 0)] = '\0';
+    if (compact) {
+        len += snprintf(buf + len, buflen - len, "%s", x86_op_names[d->op]);
+        cols[count++] = x86_new_column(18, buf);
+    } else {
+        len += snprintf(buf + len, buflen - len, "%s ", x86_op_names[d->op]);
+        for (size_t i = 0; i < array_size(o->opr) && o->opr[i]; i++) {
+            if (i != 0) len += snprintf(buf + len, buflen - len, ",");
+            len += x86_opr_name(buf + len, buflen - len, o->opr[i]);
+        }
+        cols[count++] = x86_new_column(52, buf);
+    }
+
+    if (opcode) {
+        buf[(len = 0)] = '\0';
+        len += snprintf(buf + len, buflen - len, "%02hhx %02hhx",
+            d->opc[0], d->opc[1]);
+        cols[count++] = x86_new_column(5, buf);
+        buf[(len = 0)] = '\0';
+        len += snprintf(buf + len, buflen - len, "%02hhx %02hhx",
+            d->opm[0], d->opm[1]);
+        cols[count++] = x86_new_column(5, buf);
+    }
+
+    if (compact) {
+        buf[(len = 0)] = '\0';
+        len += x86_ord_mnem(buf + len, buflen - len, p->ord);
+        cols[count++] = x86_new_column(4, buf);
+    }
+
+    buf[(len = 0)] = '\0';
+    len += x86_format_enc(buf, buflen - len, d);
+    cols[count++] = x86_new_column(31, buf);
+
+    if (!compact) {
+        buf[(len = 0)] = '\0';
+        for (size_t i = 0; i < array_size(p->ord) && p->ord[i]; i++) {
+            if (i != 0) len += snprintf(buf + len, buflen - len, ",");
+            len += x86_ord_name(buf + len, buflen - len, p->ord[i], "/");
+        }
+        cols[count++] = x86_new_column(23, buf);
+    }
+
+    buf[(len = 0)] = '\0';
+    len += x86_mode_name(buf + len, buflen - len, d->mode, "/");
+    cols[count++] = x86_new_column(8, buf);
+
+    x86_print_row(count, cols);
+}
+
+/*
+ * encoding / decoding
+ */
+
+int x86_codec_write(x86_ctx *ctx, x86_buffer *buf, x86_codec c, size_t *len)
+{
+    size_t nbytes = 0;
+
+    /* segment prefix */
+    switch (c.seg) {
+    case x86_seg_es: nbytes += x86_out8(buf, x86_pb_es); break;
+    case x86_seg_cs: nbytes += x86_out8(buf, x86_pb_cs); break;
+    case x86_seg_ss: nbytes += x86_out8(buf, x86_pb_ss); break;
+    case x86_seg_ds: nbytes += x86_out8(buf, x86_pb_ds); break;
+    case x86_seg_fs: nbytes += x86_out8(buf, x86_pb_fs); break;
+    case x86_seg_gs: nbytes += x86_out8(buf, x86_pb_gs); break;
+    }
+
+    /* other prefixes */
+    if (x86_codec_has_osize(&c)) {
+        nbytes += x86_out8(buf, x86_pb_osize);
+    }
+    if (x86_codec_has_asize(&c)) {
+        nbytes += x86_out8(buf, x86_pb_asize);
+    }
+    if (x86_codec_has_wait(&c)) {
+        nbytes += x86_out8(buf, x86_pb_wait);
+    }
+    if (x86_codec_has_lock(&c)) {
+        nbytes += x86_out8(buf, x86_pb_lock);
+    }
+    if (x86_codec_has_rep(&c)) {
+        nbytes += x86_out8(buf, x86_pb_rep);
+    }
+    if (x86_codec_has_repne(&c)) {
+        nbytes += x86_out8(buf, x86_pb_repne);
+    }
+
+    /* extended prefixes */
+    switch (x86_codec_field_ce(&c) >> x86_ce_shift) {
+        case x86_ce_rex >> x86_ce_shift:
+            nbytes += x86_out8(buf, c.rex.data[0]);
+            break;
+        case x86_ce_rex2 >> x86_ce_shift:
+            nbytes += x86_out8(buf, x86_pb_rex2);
+            nbytes += x86_out8(buf, c.rex2.data[0]);
+            break;
+        case x86_ce_vex2 >> x86_ce_shift:
+            nbytes += x86_out8(buf, x86_pb_vex2);
+            nbytes += x86_out8(buf, c.vex2.data[0]);
+            break;
+        case x86_ce_vex3 >> x86_ce_shift:
+            nbytes += x86_out8(buf, x86_pb_vex3);
+            nbytes += x86_out8(buf, c.vex3.data[0]);
+            nbytes += x86_out8(buf, c.vex3.data[1]);
+            break;
+        case x86_ce_evex >> x86_ce_shift:
+            nbytes += x86_out8(buf, x86_pb_evex);
+            nbytes += x86_out8(buf, c.evex.data[0]);
+            nbytes += x86_out8(buf, c.evex.data[1]);
+            nbytes += x86_out8(buf, c.evex.data[2]);
+            break;
+    }
+
+    /* map */
+    switch (x86_codec_field_cm(&c) >> x86_cm_shift) {
+    case x86_cm_none >> x86_cm_shift:
+        break;
+    case x86_cm_0f   >> x86_cm_shift:
+        nbytes += x86_out8(buf, 0x0f);
+        break;
+    case x86_cm_0f38 >> x86_cm_shift:
+        nbytes += x86_out16(buf, 0x380f);
+        break;
+    case x86_cm_0f3a >> x86_cm_shift:
+        nbytes += x86_out16(buf, 0x3a0f);
+        break;
+    }
+
+    /* opcode */
+    for (size_t i = 0; i < c.opclen; i++) {
+        nbytes += x86_out8(buf, c.opc[i]);
+    }
+
+    /* ModRM and SIB */
+    int b;
+    if (x86_codec_has_modrm(&c)) {
+        nbytes += x86_out8(buf, c.modrm.data[0]);
+
+        uchar rm = x86_modrm_rm(c.modrm.data[0]);
+        uchar mod = x86_modrm_mod(c.modrm.data[0]);
+
+        switch (mod) {
+        case x86_mod_disp0:
+        case x86_mod_disp8:
+        case x86_mod_dispw:
+            /* there is no SIB in real mode */
+            if (!x86_codec_is16(&c) && rm == x86_rm_sp_sib) {
+                nbytes += x86_out8(buf, c.sib.data[0]);
+            }
+            break;
+        case x86_mod_reg: break;
+        }
+        switch (mod) {
+        case x86_mod_disp0:
+            /* special case for rm/b == bp */
+            b = x86_sib_b(c.sib.data[0]);
+            if (rm == x86_rm_bp_disp0 ||
+                (rm == x86_rm_sp_sib && b == x86_rm_bp_disp0))
+            {
+                if (x86_codec_is16(&c)) {
+                    nbytes += x86_out16(buf, (u16)c.disp32);
+                } else {
+                    /* this is RIP-relative in amd64 mode */
+                    nbytes += x86_out32(buf, (u32)c.disp32);
+                }
+            }
+            break;
+        case x86_mod_disp8:
+            nbytes += x86_out8(buf, (u8)c.disp32);
+            break;
+        case x86_mod_dispw:
+            if (x86_codec_is16(&c)) {
+                nbytes += x86_out16(buf, (u16)c.disp32); break;
+            } else {
+                nbytes += x86_out32(buf, (u32)c.disp32); break;
+            }
+        case x86_mod_reg: break;
+        }
+    }
+
+    /* immediate */
+    switch (x86_codec_field_ci(&c) >> x86_ci_shift) {
+    case x86_ci_iw >> x86_ci_shift:
+        if (x86_codec_is16(&c) ^ x86_codec_has_osize(&c)) {
+            nbytes += x86_out16(buf, (u16)c.imm32);
+        } else {
+            nbytes += x86_out32(buf, (u32)c.imm32);
+        }
+        break;
+    case x86_ci_iwd >> x86_ci_shift:
+        if (x86_codec_is16(&c)) {
+            nbytes += x86_out16(buf, (u16)c.imm32);
+        } else {
+            nbytes += x86_out32(buf, (u32)c.imm32);
+        }
+        break;
+    case x86_ci_ib >> x86_ci_shift:
+        nbytes += x86_out8(buf, (u8)c.imm32);
+        break;
+    case x86_ci_i16 >> x86_ci_shift:
+        nbytes += x86_out16(buf, (u16)c.imm32);
+        break;
+    case x86_ci_i32 >> x86_ci_shift:
+        nbytes += x86_out32(buf, (u32)c.imm32);
+        break;
+    case x86_ci_i64 >> x86_ci_shift:
+        nbytes += x86_out64(buf, (u64)c.imm64);
+        break;
+    }
+
+    /* additional immediate used by CALLF/JMPF/ENTER */
+    switch (x86_codec_field_cj(&c) >> x86_cj_shift) {
+    case x86_cj_ib >> x86_cj_shift:
+        nbytes += x86_out8(buf, (u8)c.imm2);
+        break;
+    case x86_cj_i16 >> x86_cj_shift:
+        nbytes += x86_out16(buf, (u16)c.imm2);
+        break;
+    }
+
+    *len = nbytes;
+    return 0;
+}
+
+static int x86_filter_op(x86_codec *c, x86_opc_data *d, uint w)
+{
+    if (x86_codec_is16(c) && !x86_mode_has16(d->mode)) return -1;
+    if (x86_codec_is32(c) && !x86_mode_has32(d->mode)) return -1;
+    if (x86_codec_is64(c) && !x86_mode_has64(d->mode)) return -1;
+
+    switch (x86_codec_field_ce(c) >> x86_ce_shift) {
+    case x86_ce_rex >> x86_ce_shift:
+        if (x86_enc_filter_rex(c->rex, d->enc) < 0) return -1;
+        break;
+    case x86_ce_rex2 >> x86_ce_shift:
+        if (x86_enc_filter_rex2(c->rex2, d->enc) < 0) return -1;
+        break;
+    case x86_ce_vex2 >> x86_ce_shift:
+        if (x86_enc_filter_vex2(c->vex2, d->enc) < 0) return -1;
+        break;
+    case x86_ce_vex3 >> x86_ce_shift:
+        if (x86_enc_filter_vex3(c->vex3, d->enc) < 0) return -1;
+        break;
+    case x86_ce_evex >> x86_ce_shift:
+        if (x86_enc_filter_evex(c->evex, d->enc) < 0) return -1;
+        break;
+    }
+
+    if (x86_enc_has_a16(d->enc)) {
+        if (!x86_codec_is16(c) ||
+            !(x86_codec_is32(c) && x86_codec_has_asize(c))) return -1;
+    }
+    if (x86_enc_has_a32(d->enc)) {
+        if (!x86_codec_is32(c) ||
+            !(x86_codec_is64(c) && x86_codec_has_asize(c))) return -1;
+    }
+    if (x86_enc_has_a64(d->enc)) {
+        if (!x86_codec_is64(c) || x86_codec_has_asize(c)) return -1;
+    }
+
+    if (x86_enc_has_o16(d->enc)) {
+        switch (x86_enc_width(d->enc)) {
+        case x86_enc_w_ww:
+        case x86_enc_w_wx:
+            if (!(x86_codec_is16(c) ^ x86_codec_has_osize(c)) || w) return -1;
+            break;
+        }
+    }
+    if (x86_enc_has_o32(d->enc)) {
+        switch (x86_enc_width(d->enc)) {
+        case x86_enc_w_ww:
+            /* .ww means no 32-bit operands in 64-bit mode */
+            if ((x86_codec_is16(c) ^ x86_codec_has_osize(c)) ||
+                 x86_codec_is64(c)) return -1;
+            break;
+        case x86_enc_w_wx:
+            if ((x86_codec_is16(c) ^ x86_codec_has_osize(c)) || w) return -1;
+            break;
+        }
+    }
+    if (x86_enc_has_o64(d->enc)) {
+        switch (x86_enc_width(d->enc)) {
+        case x86_enc_w_ww:
+            /* .ww means ignores W=1 in 64-bit mode */
+            if (!x86_codec_is64(c)) return -1;
+            break;
+        case x86_enc_w_wx:
+            if (!x86_codec_is64(c) || !w) return -1;
+            break;
+        }
+    }
+
+    return 0;
+}
+
+static size_t x86_parse_encoding(x86_buffer *buf, x86_codec *c,
+    x86_opc_data *d)
+{
+    size_t nbytes = 0;
+
+    /* parse SIB and displacement */
+    int b;
+    if (x86_codec_has_modrm(c)) {
+        uchar rm = x86_modrm_rm(c->modrm.data[0]);
+        uchar mod = x86_modrm_mod(c->modrm.data[0]);
+        switch (mod) {
+        case x86_mod_disp0:
+        case x86_mod_disp8:
+        case x86_mod_dispw:
+            /* there is no SIB in real mode */
+            if (!x86_codec_is16(c) && rm == x86_rm_sp_sib) {
+                c->sib.data[0] = (u8)x86_in8(buf); nbytes += 1;
+            }
+            break;
+        case x86_mod_reg:
+            break;
+        }
+        switch (mod) {
+        case x86_mod_disp0:
+            /* special case for rm/b == bp */
+            b = x86_sib_b(c->sib.data[0]);
+            if (rm == x86_rm_bp_disp0 ||
+                (rm == x86_rm_sp_sib && b == x86_rm_bp_disp0))
+            {
+                if (x86_codec_is16(c)) {
+                    c->disp32 = (i16)x86_in16(buf); nbytes += 2;
+                } else {
+                    /* this is RIP-relative in amd64 mode */
+                    c->disp32 = (i32)x86_in32(buf); nbytes += 4;
+                }
+            }
+            break;
+        case x86_mod_disp8:
+            c->disp32 = (i8)x86_in8(buf); nbytes += 1;
+            break;
+        case x86_mod_dispw:
+            if (x86_codec_is16(c)) {
+                c->disp32 = (i16)x86_in16(buf); nbytes += 2;
+            } else {
+                c->disp32 = (i32)x86_in32(buf); nbytes += 4;
+            }
+        case x86_mod_reg:
+            break;
+        }
+    }
+
+    /* parse immediate */
+    switch (x86_enc_imm(d->enc) >> x86_enc_i_shift) {
+    case x86_enc_i_ib >> x86_enc_i_shift:
+        c->imm32 = (i8)x86_in8(buf); nbytes += 1;
+        c->flags |= x86_ci_ib;
+        break;
+    case x86_enc_i_iw >> x86_enc_i_shift:
+        if (x86_codec_is16(c) ^ x86_codec_has_osize(c)) {
+            c->imm32 = (i16)x86_in16(buf); nbytes += 2;
+        } else {
+            c->imm32 = (i32)x86_in32(buf); nbytes += 4;
+        }
+        c->flags |= x86_ci_iw;
+        break;
+    case x86_enc_i_iwd >> x86_enc_i_shift:
+        if (x86_codec_is16(c)) {
+            c->imm32 = (i16)x86_in16(buf); nbytes += 2;
+        } else {
+            c->imm32 = (i32)x86_in32(buf); nbytes += 4;
+        }
+        c->flags |= x86_ci_iwd;
+        break;
+    case x86_enc_i_i16 >> x86_enc_i_shift:
+        c->imm32 = (i16)x86_in16(buf);  nbytes += 2;
+        c->flags |= x86_ci_i16;
+        break;
+    case x86_enc_i_i32 >> x86_enc_i_shift:
+        c->imm32 = (i32)x86_in32(buf);  nbytes += 4;
+        c->flags |= x86_ci_i32;
+        break;
+    case x86_enc_i_i64 >> x86_enc_i_shift:
+        c->imm64 = (i64)x86_in64(buf);  nbytes += 8;
+        c->flags |= x86_ci_i64;
+        break;
+    }
+
+    /* additional immediate used by CALLF/JMPF/ENTER */
+    switch (x86_enc_imm2(d->enc) >> x86_enc_j_shift) {
+    case x86_enc_j_ib >> x86_enc_j_shift:
+        c->imm2 = (i8)x86_in8(buf); nbytes += 1;
+        c->flags |= x86_cj_ib;
+        break;
+    case x86_enc_j_i16 >> x86_enc_j_shift:
+        c->imm2 = (i16)x86_in16(buf); nbytes += 2;
+        c->flags |= x86_cj_i16;
+        break;
+    }
+
+    return nbytes;
+}
+
+enum {
+    x86_enc_tpm_mask  = x86_enc_t_mask | x86_enc_prexw_mask | x86_enc_m_mask
+};
+
+static x86_opc_data *x86_table_match(x86_ctx *ctx, x86_codec *c,
+    x86_opc_data k, int w)
+{
+    x86_opc_data *r = NULL;
+    /* key is type+prefix+map with substituted rexw=w flag */
+    k.enc = ((k.enc & ~x86_enc_p_rexw) |
+             (-w    &  x86_enc_p_rexw)) & x86_enc_tpm_mask;
+    x86_debugf("table_lookup { type:%x prefix:%x map:%x "
+        "opc:[%02hhx %02hhx] opm:[%02hhx %02hhx] }",
+        (k.enc & x86_enc_t_mask) >> x86_enc_t_shift,
+        (k.enc & x86_enc_p_mask) >> x86_enc_p_shift,
+        (k.enc & x86_enc_m_mask) >> x86_enc_m_shift,
+        k.opc[0], k.opc[1], k.opm[0], k.opm[1]);
+    r = x86_table_lookup(ctx->idx, &k);
+    while (r < ctx->idx->map + ctx->idx->map_count) {
+        /* substitute suffix of record for precise match */
+        k.enc = ((k.enc & x86_enc_tpm_mask) |
+                  (r->enc & ~x86_enc_tpm_mask));
+        size_t oprec = (r - ctx->idx->map);
+        x86_debugf("checking opdata %zu", oprec);
+        if (debug) x86_print_op(r, 1, 1);
+        if (x86_opc_data_compare_masked(&k, r) != 0) {
+            x86_debugf("** no matches");
+            r = NULL;
+            break;
+        }
+        if (x86_filter_op(c, r, w) == 0) break;
+        r++;
+    }
+    return r;
+}
+
+int x86_codec_read(x86_ctx *ctx, x86_buffer *buf, x86_codec *c, size_t *len)
+{
+    uint state = x86_state_top;
+    size_t nbytes = 0, limit = buf->end - buf->start;
+    uint t = 0, m = 0, w = 0, p = 0, l = 0, mode = ctx->mode;
+    x86_opc_data k = { 0 }, *r = NULL;
+    uchar b = 0, lastp = 0;
+
+    memset(c, 0, sizeof(x86_codec));
+    switch (mode) {
+    case x86_modes_32: c->flags |= x86_cf_ia32; break;
+    case x86_modes_64: c->flags |= x86_cf_amd64; break;
+    }
+
+    while (state != x86_state_done) {
+        nbytes += x86_buffer_read(buf, &b, 1);
+        switch (state) {
+        case x86_state_top:
+            switch (b) {
+            case 0x40: case 0x41: case 0x42: case 0x43:
+            case 0x44: case 0x45: case 0x46: case 0x47:
+            case 0x48: case 0x49: case 0x4a: case 0x4b:
+            case 0x4c: case 0x4d: case 0x4e: case 0x4f:
+                c->rex.data[0] = b;
+                c->flags |= x86_ce_rex;
+                w = (c->rex.data[0] >> 3) & 1;
+                t = x86_table_lex;
+                state = x86_state_rex_opcode;
+                break;
+            case x86_pb_26:
+            case x86_pb_2e:
+            case x86_pb_36:
+            case x86_pb_3e:
+            case x86_pb_64:
+            case x86_pb_65:
+                state = x86_state_segment;
+                goto segment_reparse;
+            case x86_pb_66:
+            case x86_pb_67:
+            case x86_pb_9b:
+            case x86_pb_f0:
+            case x86_pb_f2:
+            case x86_pb_f3:
+                state = x86_state_legacy;
+                goto legacy_reparse;
+            case x86_pb_62:
+                nbytes += x86_buffer_read(buf, c->evex.data, 3);
+                c->flags |= x86_ce_evex;
+                m = (c->evex.data[0] >> 0) & 7;
+                w = (c->evex.data[1] >> 7) & 1;
+                p = (c->evex.data[1] >> 0) & 3;
+                l = (c->evex.data[2] >> 5) & 3;
+                t = x86_table_evex;
+                state = x86_state_vex_opcode;
+                break;
+            case x86_pb_c4:
+                nbytes += x86_buffer_read(buf, c->vex3.data, 2);
+                c->flags |= x86_ce_vex3;
+                m = (c->vex3.data[0] >> 0) & 31;
+                w = (c->vex3.data[1] >> 7) & 1;
+                p = (c->vex3.data[1] >> 0) & 3;
+                l = (c->vex3.data[1] >> 2) & 1;
+                t = x86_table_vex;
+                state = x86_state_vex_opcode;
+                break;
+            case x86_pb_c5:
+                nbytes += x86_buffer_read(buf, c->vex2.data, 1);
+                c->flags |= x86_ce_vex2;
+                m = x86_map_0f;
+                p = (c->vex2.data[0] >> 0) & 3;
+                l = (c->vex2.data[0] >> 2) & 1;
+                t = x86_table_vex;
+                state = x86_state_vex_opcode;
+                break;
+            case x86_pb_d5:
+                nbytes += x86_buffer_read(buf, c->rex2.data, 1);
+                c->flags |= x86_ce_rex2;
+                m = (c->rex2.data[0] >> 7) & 1;
+                w = (c->rex2.data[0] >> 3) & 1;
+                t = x86_table_lex;
+                state = x86_state_lex_opcode;
+                break;
+            case 0x0f:
+                t = x86_table_lex;
+                state = x86_state_map_0f;
+                break;
+            default:
+                m = x86_map_none;
+                t = x86_table_lex;
+                state = x86_state_lex_opcode;
+                goto lex_reparse;
+            }
+            break;
+        case x86_state_segment: segment_reparse:
+            switch (b) {
+            case 0x40: case 0x41: case 0x42: case 0x43:
+            case 0x44: case 0x45: case 0x46: case 0x47:
+            case 0x48: case 0x49: case 0x4a: case 0x4b:
+            case 0x4c: case 0x4d: case 0x4e: case 0x4f:
+                c->rex.data[0] = b;
+                c->flags |= x86_ce_rex;
+                w = (c->rex.data[0] >> 3) & 1;
+                t = x86_table_lex;
+                state = x86_state_rex_opcode;
+                break;
+            case x86_pb_26:
+                c->seg = x86_seg_es; state = x86_state_legacy;
+                break;
+            case x86_pb_2e:
+                c->seg = x86_seg_cs; state = x86_state_legacy;
+                break;
+            case x86_pb_36:
+                c->seg = x86_seg_ss; state = x86_state_legacy;
+                break;
+            case x86_pb_3e:
+                c->seg = x86_seg_ds; state = x86_state_legacy;
+                break;
+            case x86_pb_64:
+                c->seg = x86_seg_fs; state = x86_state_legacy;
+                break;
+            case x86_pb_65:
+                c->seg = x86_seg_gs; state = x86_state_legacy;
+                break;
+            case x86_pb_66:
+            case x86_pb_67:
+            case x86_pb_9b:
+            case x86_pb_f0:
+            case x86_pb_f2:
+            case x86_pb_f3:
+                state = x86_state_legacy;
+                goto legacy_reparse;
+            case x86_pb_62:
+            case x86_pb_c4:
+            case x86_pb_c5:
+            case x86_pb_d5:
+                goto err;
+            case 0x0f:
+                t = x86_table_lex;
+                state = x86_state_map_0f;
+                break;
+            default:
+                m = x86_map_none;
+                t = x86_table_lex;
+                state = x86_state_lex_opcode;
+                goto lex_reparse;
+            }
+            break;
+        case x86_state_legacy: legacy_reparse:
+            switch (b) {
+            case 0x40: case 0x41: case 0x42: case 0x43:
+            case 0x44: case 0x45: case 0x46: case 0x47:
+            case 0x48: case 0x49: case 0x4a: case 0x4b:
+            case 0x4c: case 0x4d: case 0x4e: case 0x4f:
+                c->rex.data[0] = b;
+                c->flags |= x86_ce_rex;
+                w = (c->rex.data[0] >> 3) & 1;
+                t = x86_table_lex;
+                state = x86_state_rex_opcode;
+                break;
+            case x86_pb_26:
+            case x86_pb_2e:
+            case x86_pb_36:
+            case x86_pb_3e:
+            case x86_pb_64:
+            case x86_pb_65:
+            case x86_pb_62:
+            case x86_pb_c4:
+            case x86_pb_c5:
+            case x86_pb_d5:
+                goto err;
+            case x86_pb_66:
+                lastp = b;
+                c->flags |= x86_cp_osize;
+                break;
+            case x86_pb_67:
+                lastp = b;
+                c->flags |= x86_cp_asize;
+                break;
+            case x86_pb_9b:
+                lastp = b;
+                c->flags |= x86_cp_wait;
+                break;
+            case x86_pb_f0:
+                lastp = b;
+                c->flags |= x86_cp_lock;
+                break;
+            case x86_pb_f2:
+                lastp = b;
+                c->flags |= x86_cp_repne;
+                break;
+            case x86_pb_f3:
+                lastp = b;
+                c->flags |= x86_cp_rep;
+                break;
+            case 0x0f:
+                t = x86_table_lex;
+                state = x86_state_map_0f;
+                break;
+            default:
+                m = x86_map_none;
+                t = x86_table_lex;
+                state = x86_state_lex_opcode;
+                goto lex_reparse;
+            }
+            break;
+        case x86_state_rex_opcode:
+            switch (b) {
+            case 0x0f:
+                state = x86_state_map_0f;
+                break;
+            default:
+                state = x86_state_lex_opcode;
+                goto lex_reparse;
+            }
+            break;
+        case x86_state_map_0f:
+            switch (b) {
+            case 0x38:
+                c->flags |= x86_cm_0f38;
+                m = x86_map_0f38;
+                state = x86_state_lex_opcode;
+                break;
+            case 0x3a:
+                c->flags |= x86_cm_0f3a;
+                m = x86_map_0f3a;
+                state = x86_state_lex_opcode;
+                break;
+            default:
+                c->flags |= x86_cm_0f;
+                m = x86_map_0f;
+                state = x86_state_lex_opcode;
+                goto lex_reparse;
+            }
+            break;
+        case x86_state_lex_opcode: lex_reparse:
+            k.enc |= ((t << x86_enc_t_shift) & x86_enc_t_mask)
+                  |  ((m << x86_enc_m_shift) & x86_enc_m_mask);
+            switch (lastp) {
+            case 0x66: k.enc |= x86_enc_p_66; break;
+            case 0x9b: k.enc |= x86_enc_p_9b; break;
+            case 0xf2: k.enc |= x86_enc_p_f2; break;
+            case 0xf3: k.enc |= x86_enc_p_f3; break;
+            }
+            state = x86_state_done;
+            break;
+        case x86_state_vex_opcode:
+            k.enc |= ((t << x86_enc_t_shift) & x86_enc_t_mask)
+                  |  ((m << x86_enc_m_shift) & x86_enc_m_mask);
+            switch (p) {
+            case x86_pfx_66: k.enc |= x86_enc_p_66; break;
+            case x86_pfx_f2: k.enc |= x86_enc_p_f2; break;
+            case x86_pfx_f3: k.enc |= x86_enc_p_f3; break;
+            }
+            state = x86_state_done;
+            (void)l; /* l can be added to the index key */
+            break;
+        default:
+            abort();
+        }
+    };
+
+    /* populate opcode for table lookup */
+    k.mode = mode;
+    c->opc[0] = k.opc[0] = b;
+    nbytes += x86_buffer_read(buf, &b, 1);
+    c->opc[1] = k.opc[1] = b;
+    k.opm[0] = k.opm[1] = 0xff;
+
+    /* if REX.W=1 first attempt to lookup W=1 record */
+    if (w) {
+        r = x86_table_match(ctx, c, k, 1);
+    }
+
+    /* if REX.W=0 or search failed lookup W=0/WIG record */
+    if (!w || (w && !r)) {
+        r = x86_table_match(ctx, c, k, 0);
+    }
+
+    /* now attempt lookup without using the prefix */
+    if (!r) {
+        k.enc &= ~x86_enc_p_mask;
+
+        /* if REX.W=1 first attempt to lookup W=1 record */
+        if (w) {
+            r = x86_table_match(ctx, c, k, 1);
+        }
+
+        /* if REX.W=0 or search failed lookup W=0/WIG record */
+        if (!w || (w && !r)) {
+            r = x86_table_match(ctx, c, k, 0);
+        }
+    }
+
+    /* parse encoding */
+    if (r) {
+
+        /* set opcode length and modrm flags */
+        switch (x86_enc_func(r->enc)) {
+        case x86_enc_f_modrm_r:
+        case x86_enc_f_modrm_n:
+            /* second byte is modrm */
+            c->flags |= x86_cf_modrm;
+            c->opclen = 1;
+            break;
+        case x86_enc_f_opcode:
+        case x86_enc_f_opcode_r:
+            /* two byte opcode */
+            c->opclen = 2;
+            break;
+        default:
+            /* no second opcode byte */
+            nbytes -= x86_buffer_unread(buf, 1);
+            c->opclen = 1;
+            break;
+        }
+
+        /* parse SIB, disp, imm from format */
+        nbytes += x86_parse_encoding(buf, c, r);
+        if (nbytes <= limit) {
+            c->rec = (r - ctx->idx->map);
+            *len = nbytes;
+            return 0;
+        }
+    }
+
+err:
+    nbytes -= x86_buffer_unread(buf, nbytes);
+    *len = nbytes;
+    return -1;
+}
+
+static x86_operands x86_codec_operands(x86_ctx *ctx, x86_codec *c)
+{
+    x86_operands q;
+    memset(&q, 0, sizeof(q));
+
+    const x86_opc_data *d = ctx->idx->map + c->rec;
+
+    q.osz = x86_codec_has_osize(c);
+
+    if (x86_codec_has_modrm(c)) {
+        uchar rm = x86_modrm_rm(c->modrm.data[0]);
+        uchar reg = x86_modrm_reg(c->modrm.data[0]);
+        uchar mod = x86_modrm_mod(c->modrm.data[0]);
+
+        /*
+         * q.rm contains unextended value from ModRM.rm
+         * and is used to indicate SIB/disp encoding.
+         *
+         * if SIB present, copy SIB.b into q.b
+         * if SIB not present, copy ModRM.rm into q.b
+         *
+         * q.b contains extended ModRM.rm or SIB.b
+         */
+
+        q.mod = mod;
+        q.rm = rm;
+        q.r = reg;
+
+        switch (mod) {
+        case x86_mod_disp0:
+        case x86_mod_disp8:
+        case x86_mod_dispw:
+            if (!x86_codec_is16(c) && rm == x86_rm_sp_sib) {
+                q.b = x86_sib_b(c->sib.data[0]);
+                q.x = x86_sib_x(c->sib.data[0]);
+                q.s = x86_sib_s(c->sib.data[0]);
+            } else {
+                q.b = q.rm;
+            }
+            break;
+        case x86_mod_reg:
+            q.b = q.rm;
+            break;
+        }
+    } else if (d->enc & x86_enc_o_opcode_r) {
+        q.b = c->opc[0] & 7;
+    } else if (d->enc & x86_enc_f_opcode_r) {
+        q.b = c->opc[1] & 7;
+    }
+
+    switch (x86_codec_field_ce(c) >> x86_ce_shift) {
+    case x86_ce_rex >> x86_ce_shift:
+        q.b |=  (c->rex.data[0] &    1) << 3; /* [0] -> b[3]*/
+        q.x |=  (c->rex.data[0] &    2) << 2; /* [1] -> x[3]*/
+        q.r |=  (c->rex.data[0] &    4) << 1; /* [2] -> r[3]*/
+        q.w  =  (c->rex.data[0] &    8) >> 3;
+        break;
+    case x86_ce_rex2 >> x86_ce_shift:
+        q.b |=  (c->rex2.data[0] &   1) << 3; /* [0] -> b[3]*/
+        q.x |=  (c->rex2.data[0] &   2) << 2; /* [1] -> x[3]*/
+        q.r |=  (c->rex2.data[0] &   4) << 1; /* [2] -> r[3]*/
+        q.w  =  (c->rex2.data[0] &   8) >> 3;
+        q.b |=  (c->rex2.data[0] &  16) >> 0; /* [4] -> b[4]*/
+        q.x |=  (c->rex2.data[0] &  32) >> 1; /* [5] -> x[4]*/
+        q.r |=  (c->rex2.data[0] &  64) >> 2; /* [6] -> r[4]*/
+        break;
+    case x86_ce_vex2 >> x86_ce_shift:
+        q.r |= (~c->vex2.data[0] & 128) >> 4; /* [7] -> r[3] */
+        q.l  =  (c->vex2.data[0] >>  2) & 1;
+        q.v  = (~c->vex2.data[0] >>  3) & 15;
+        q.osz = (c->vex2.data[0] & 3) == x86_pfx_66;
+        break;
+    case x86_ce_vex3 >> x86_ce_shift:
+        q.b |= (~c->vex3.data[0] &  32) >> 2; /* [5] -> b[3]*/
+        q.x |= (~c->vex3.data[0] &  64) >> 3; /* [6] -> x[3]*/
+        q.r |= (~c->vex3.data[0] & 128) >> 4; /* [7] -> r[3]*/
+        q.l  =  (c->vex3.data[1] >>  2) & 1;
+        q.v  = (~c->vex3.data[1] >>  3) & 15;
+        q.w  =  (c->vex3.data[1] >>  7) & 1;
+        q.osz = (c->vex3.data[1] & 3) == x86_pfx_66;
+        break;
+    case x86_ce_evex >> x86_ce_shift:
+        q.b |= (~c->evex.data[0] &  32) >> 2; /* [5] -> b[3]*/
+        q.x |= (~c->evex.data[0] &  64) >> 3; /* [6] -> x[3]*/
+        q.r |= (~c->evex.data[0] & 128) >> 4; /* [7] -> r[3]*/
+        q.b |=  (c->evex.data[0] &   8) << 1; /* [3] -> b[4]*/
+        q.x |= (~c->evex.data[1] &   4) << 2; /* [2] -> x[4]*/
+        q.r |= (~c->evex.data[0] &  16) >> 0; /* [4] -> r[4]*/
+        q.v  = (~c->evex.data[1] >>  3) & 15;
+        q.v |= (~c->evex.data[2] &   8) << 1; /* [3] -> v[4]*/
+        q.k  =  (c->evex.data[2] >>  0) & 7;
+        q.l  =  (c->evex.data[2] >>  5) & 3;
+        q.brd = (c->evex.data[2] >>  4) & 1;
+        q.osz = (c->evex.data[1] & 3) == x86_pfx_66;
+        break;
+    }
+
+    return q;
+}
+
+/*
+ * disassembly
+ */
+
+static inline x86_arg x86_codec_meta(uint enc, uint opr, uint ord,
+    x86_operands q)
+{
+    x86_arg a = { enc, opr, ord, q };
+    return a;
+}
+
+static uint x86_codec_addr_size(x86_codec *c)
+{
+    /* todo - handle address size prefix */
+    if (x86_codec_is32(c)) return x86_opr_size_32;
+    if (x86_codec_is64(c)) return x86_opr_size_64;
+    return x86_opr_size_16;
+}
+
+static const char *x86_ptr_size_str(uint sz)
+{
+    switch (sz) {
+    case x86_opr_size_8: return "byte ptr ";
+    case x86_opr_size_16: return "word ptr ";
+    case x86_opr_size_32: return "dword ptr ";
+    case x86_opr_size_64: return "qword ptr ";
+    case x86_opr_size_80: return "tbyte ptr ";
+    case x86_opr_size_128: return "xmmword ptr ";
+    case x86_opr_size_256: return "ymmword ptr ";
+    case x86_opr_size_512: return "zmmword ptr ";
+    default: return "";
+    }
+}
+
+static uint x86_opr_reg_size(x86_codec *c, x86_arg a)
+{
+    uint oprty = x86_opr_type_val(a.opr);
+    uint oprsz = x86_opr_size_val(a.opr);
+    uint oprmem = x86_opr_mem_val(a.opr);
+
+    /* 'rw' or 'mw' deduce size from mode, operand size prefix and REX.W */
+    if ((oprty == x86_opr_reg && oprsz == x86_opr_size_w) ||
+        (oprmem == x86_opr_mw) ||
+        (a.opr == x86_opr_moffs || a.opr == x86_opr_reg_psi ||
+         a.opr == x86_opr_reg_pdi))
+    {
+        switch (x86_enc_width(a.enc)) {
+        case x86_enc_w_wb: return x86_opr_size_8;
+        case x86_enc_w_ww:
+            if (x86_codec_is16(c))
+                return a.q.osz ? x86_opr_size_32 : x86_opr_size_16;
+            if (x86_codec_is32(c))
+                return a.q.osz ? x86_opr_size_16 : x86_opr_size_32;
+            if (x86_codec_is64(c))
+                return a.q.osz ? x86_opr_size_16 : x86_opr_size_64;
+            break;
+        case x86_enc_w_wx:
+            if (x86_codec_is16(c))
+                return a.q.osz ? x86_opr_size_32 : x86_opr_size_16;
+            if (x86_codec_is32(c))
+                return a.q.osz ? x86_opr_size_16 : x86_opr_size_32;
+            if (x86_codec_is64(c))
+                return a.q.osz ? x86_opr_size_16 :
+                        a.q.w ? x86_opr_size_64 : x86_opr_size_32;
+            break;
+        case x86_enc_w_w0: return x86_opr_size_32;
+        case x86_enc_w_w1: return x86_opr_size_64;
+        default: break;
+        }
+    }
+
+    /* operand contains the register size */
+    if (oprsz != 0 && oprsz != x86_opr_size_w && oprsz != x86_opr_size_a) {
+        return oprsz;
+    }
+
+    return 0;
+}
+
+static uint x86_opr_ptr_size(x86_codec *c, x86_arg a)
+{
+    uint memsz = x86_opr_mem_size(a.opr);
+    if (memsz == x86_opr_size_w) {
+        memsz = x86_opr_reg_size(c, a);
+    }
+    return memsz;
+}
+
+static uint x86_sized_gpr(x86_codec *c, uint reg, uint opr)
+{
+    switch (x86_opr_size_val(opr)) {
+    case x86_opr_size_8:
+        /* legacy encoding selects ah/cd/dh/bh instead of spl/bpl/sil/dil */
+        if ((x86_codec_field_ce(c)) == x86_ce_none &&
+           ((reg & 31) >= 4 && (reg & 31) < 8)) return x86_reg_bl | (reg & 31);
+        return x86_reg_b | (reg & 31);
+    case x86_opr_size_16: return x86_reg_w | (reg & 31);
+    case x86_opr_size_32: return x86_reg_d | (reg & 31);
+    case x86_opr_size_64: return x86_reg_q | (reg & 31);
+    default: return reg;
+    }
+}
+
+static uint x86_sized_vec(uint reg, uint opr)
+{
+    switch (x86_opr_size_val(opr)) {
+    case x86_opr_size_64:  return x86_reg_mmx | (reg & 7);
+    case x86_opr_size_128: return x86_reg_xmm | (reg & 31);
+    case x86_opr_size_256: return x86_reg_ymm | (reg & 31);
+    case x86_opr_size_512: return x86_reg_zmm | (reg & 31);
+    default: return reg;
+    }
+}
+
+static uint x86_regsz_bytes(uint regsz)
+{
+    switch (regsz) {
+    case x86_opr_size_8: return 1;
+    case x86_opr_size_16: return 2;
+    case x86_opr_size_32: return 4;
+    case x86_opr_size_64: return 8;
+    case x86_opr_size_128: return 16;
+    case x86_opr_size_256: return 32;
+    case x86_opr_size_512: return 64;
+    default: break;
+    }
+    return 1;
+}
+
+static size_t x86_opr_intel_reg_str_internal(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a, uint reg)
+{
+    size_t len = 0;
+
+    switch (x86_opr_type_val(a.opr)) {
+    case x86_opr_reg: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_sized_gpr(c, reg,
+        x86_opr_reg_size(c, a)))); break;
+    case x86_opr_vec: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_sized_vec(reg, a.opr))); break;
+    case x86_opr_k: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_kmask | (reg & 7))); break;
+    case x86_opr_mmx: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_mmx | (reg & 7))); break;
+    case x86_opr_st: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_fpu | (reg & 7))); break;
+    case x86_opr_bnd: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_bnd | (reg & 7))); break;
+    case x86_opr_seg: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_sreg | (reg & 7)));  break;
+    case x86_opr_creg: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_creg | (reg & 15))); break;
+    case x86_opr_dreg: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_dreg | (reg & 15))); break;
+    default: len = snprintf(buf, buflen, "%s", "unknown"); break;
+    }
+
+    if ((a.q.k & 7) > 0 && (a.opr & x86_opr_flag_k) != 0) {
+        len += snprintf(buf + len, buflen - len, " {%s}",
+                        x86_reg_name(x86_reg_kmask | (a.q.k & 7)));
+    }
+
+    return len;
+}
+
+static uint x86_opr_bcst_size(uint opr)
+{
+    switch (x86_opr_bcst_val(opr)) {
+    case x86_opr_m16bcst: return x86_opr_size_16;
+    case x86_opr_m32bcst: return x86_opr_size_32;
+    case x86_opr_m64bcst: return x86_opr_size_64;
+    }
+    return 0;
+}
+
+x86_opr_formats x86_opr_formats_intel_hex =
+{
+    .ptr_rip            = "%s[rip]",
+    .ptr_rip_disp       = "%s[rip %s 0x%x]",
+    .ptr_reg            = "%s[%s]",
+    .ptr_reg_disp       = "%s[%s %s 0x%x]",
+    .ptr_reg_sreg       = "%s[%s + %d*%s]",
+    .ptr_reg_sreg_disp  = "%s[%s + %d*%s %s 0x%x]",
+    .ptr_reg_reg        = "%s[%s + %s]",
+    .ptr_reg_reg_disp   = "%s[%s + %s %s 0x%x]",
+    .ptr_sreg           = "%s[%d*%s]",
+    .ptr_disp           = "%s[%s0x%x]",
+    .ptr_imm64          = "%s[%s0x%llx]",
+    .ptr_imm32          = "%s[%s0x%x]",
+    .imm64              = "%s0x%llx",
+    .imm32              = "%s0x%x",
+    .reg                = "%s",
+};
+
+x86_opr_formats x86_opr_formats_intel_dec =
+{
+    .ptr_rip            = "%s[rip]",
+    .ptr_rip_disp       = "%s[rip %s %u]",
+    .ptr_reg            = "%s[%s]",
+    .ptr_reg_disp       = "%s[%s %s %u]",
+    .ptr_reg_sreg       = "%s[%s + %d*%s]",
+    .ptr_reg_sreg_disp  = "%s[%s + %d*%s %s %u]",
+    .ptr_reg_reg        = "%s[%s + %s]",
+    .ptr_reg_reg_disp   = "%s[%s + %s %s %u]",
+    .ptr_sreg           = "%s[%d*%s]",
+    .ptr_disp           = "%s[%s%u]",
+    .ptr_imm64          = "%s[%s%llu]",
+    .ptr_imm32          = "%s[%s%u]",
+    .imm64              = "%s%llu",
+    .imm32              = "%s%u",
+    .reg                = "%s",
+};
+
+static size_t x86_opr_intel_mrm_str_internal(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a, x86_opr_formats *fmt)
+{
+    uint regsz = x86_opr_reg_size(c, a);
+    uint ptrsz = x86_opr_ptr_size(c, a);
+    uint addrsz = x86_codec_addr_size(c);
+    uint bcstsz = x86_opr_bcst_size(a.opr);
+    uint oprmem = x86_opr_mem_val(a.opr);
+    uint vmsz = x86_opr_ew_size(a.opr);
+    size_t len = 0;
+
+    int is_disp = c->disp32 != 0;
+    int is_scale = a.q.s != 0;
+    int is_sib = a.q.rm == x86_sp;
+    int is_vsib = oprmem == x86_opr_vm32 || oprmem == x86_opr_vm64;
+    int is_reg = a.q.mod == x86_mod_reg;
+    int is_disp0 = a.q.mod == x86_mod_disp0;
+    int is_disp8 = a.q.mod == x86_mod_disp8;
+    int is_base_bp = (a.q.b & 7) == x86_bp;
+    int is_base_sp = (a.q.b & 7) == x86_sp;
+    int is_index_sp = a.q.x == x86_sp;
+    int is_64bit = x86_codec_is64(c);
+    int is_evex = x86_codec_field_ce(c) == x86_ce_evex;
+
+    int is_ptr_rip = is_base_bp && is_disp0 && !is_sib && is_64bit;
+    int is_ptr_rip_disp = is_ptr_rip && is_disp;
+    int is_ptr_disp = is_base_bp && is_disp0 && (!is_sib ||
+                      (is_sib && !is_vsib && is_index_sp && !is_scale));
+    int is_ptr_sreg = is_sib && is_base_bp && is_disp0;
+    int is_ptr_reg_sreg = is_sib && is_scale;
+    int is_ptr_reg_sreg_disp = is_ptr_reg_sreg && is_disp;
+    int is_ptr_reg_reg = is_sib && !(is_base_sp && is_index_sp && !is_vsib);
+    int is_ptr_reg_reg_disp = is_ptr_reg_reg && is_disp;
+
+    int s = (1 << a.q.s);
+    uint d = c->disp32 < 0 ? -c->disp32 : c->disp32;
+
+    const char *so = c->disp32 < 0 ? "-" : "+";
+    const char *sn = c->disp32 < 0 ? "-" : "";
+    const char *p = is_vsib ? x86_ptr_size_str(vmsz) : x86_ptr_size_str(ptrsz);
+    const char *b = x86_reg_name(x86_sized_gpr(c, a.q.b, addrsz));
+    const char *x = is_vsib ? x86_reg_name(x86_sized_vec(a.q.x, regsz)) :
+        is_index_sp ? "riz" : x86_reg_name(x86_sized_gpr(c, a.q.x, addrsz));
+
+    if (is_disp8 && is_evex) {
+        d *= x86_opr_et_val(a.opr) ?
+            x86_opr_ew_bytes(a.opr) : x86_regsz_bytes(ptrsz);
+    }
+
+    if (is_reg) {
+        len = x86_opr_intel_reg_str_internal(buf, buflen, c, a, a.q.b);
+    } else if (is_ptr_rip_disp) {
+        len = snprintf(buf, buflen, fmt->ptr_rip_disp, p, so, d);
+    } else if (is_ptr_rip) {
+        len = snprintf(buf, buflen, fmt->ptr_rip, p);
+    } else if (is_ptr_disp) {
+        len = snprintf(buf, buflen, fmt->ptr_disp, p, sn, d);
+    } else if (is_ptr_sreg) {
+        len = snprintf(buf, buflen, fmt->ptr_sreg, p, s, x);
+    } else if (is_ptr_reg_sreg_disp) {
+        len = snprintf(buf, buflen, fmt->ptr_reg_sreg_disp, p, b, s, x, so, d);
+    } else if (is_ptr_reg_sreg) {
+        len = snprintf(buf, buflen, fmt->ptr_reg_sreg, p, b, s, x);
+    } else if (is_ptr_reg_reg_disp) {
+        len = snprintf(buf, buflen, fmt->ptr_reg_reg_disp, p, b, x, so, d);
+    } else if (is_ptr_reg_reg) {
+        len = snprintf(buf, buflen, fmt->ptr_reg_reg, p, b, x);
+    } else if (is_disp) {
+        len = snprintf(buf, buflen, fmt->ptr_reg_disp, p, b, so, d);
+    } else {
+        len = snprintf(buf, buflen, fmt->ptr_reg, p, b);
+    }
+
+    if (!is_reg && (a.q.k & 7) > 0 && (a.opr & x86_opr_flag_k) != 0) {
+        const char *k = x86_reg_name(x86_reg_kmask | (a.q.k & 7));
+        len += snprintf(buf + len, buflen - len, " {%s}", k);
+    }
+
+    if (bcstsz && a.q.brd) {
+        int bcstsc = x86_regsz_bytes(ptrsz) / x86_regsz_bytes(bcstsz);
+        len += snprintf(buf + len, buflen - len, "{1to%u}", bcstsc);
+    }
+
+    return len;
+}
+
+static size_t x86_opr_intel_mrm_dec_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_mrm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_dec);
+}
+
+static size_t x86_opr_intel_mrm_hex_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_mrm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_hex);
+}
+
+static size_t x86_opr_intel_reg_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_reg_str_internal(buf, buflen, c, a, a.q.r);
+}
+
+static size_t x86_opr_intel_vec_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_reg_str_internal(buf, buflen, c, a, a.q.v);
+}
+
+static size_t x86_opr_intel_opb_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_reg_str_internal(buf, buflen, c, a, a.q.b);
+}
+
+static size_t x86_opr_intel_is4_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    uint reg = (c->imm32 >> 4) & 15;
+    return x86_opr_intel_reg_str_internal(buf, buflen, c, a, reg);
+}
+
+static size_t x86_opr_intel_imm_str_internal(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a, x86_opr_formats *fmt)
+{
+    if (a.opr == x86_opr_moffs) {
+        uint regsz = x86_opr_reg_size(c, a);
+        if ((x86_codec_field_ci(c)) == x86_ci_i64) {
+            llong imm = c->imm64;
+            return snprintf(buf, buflen, fmt->ptr_imm64,
+                x86_ptr_size_str(regsz),
+                imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+        } else {
+            int imm = c->imm32;
+            return snprintf(buf, buflen, fmt->ptr_imm32,
+                x86_ptr_size_str(regsz),
+                imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+        }
+    } else {
+        if ((x86_codec_field_ci(c)) == x86_ci_i64) {
+            llong imm = c->imm64;
+            return snprintf(buf, buflen, fmt->imm64,
+                imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+        } else {
+            int imm = c->imm32;
+            return snprintf(buf, buflen, fmt->imm32,
+                imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+        }
+    }
+}
+
+static size_t x86_opr_intel_imm_hex_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_imm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_hex);
+}
+
+static size_t x86_opr_intel_imm_dec_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_imm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_dec);
+}
+
+static size_t x86_opr_intel_ime_hex_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    int imm = c->imm2;
+    return snprintf(buf, buflen, "%s0x%x",
+        imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+}
+
+static size_t x86_opr_intel_ime_dec_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    int imm = c->imm2;
+    return snprintf(buf, buflen, "%s%u",
+        imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+}
+
+static size_t x86_opr_intel_rel_hex_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a, size_t pc_offset, x86_fmt_symbol sym_cb)
+{
+    size_t len = x86_opr_intel_imm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_hex);
+    if (sym_cb) {
+        len += sym_cb(buf + len, buflen - len, c, pc_offset);
+    }
+    return len;
+}
+
+static size_t x86_opr_intel_rel_dec_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a, size_t pc_offset, x86_fmt_symbol sym_cb)
+{
+    size_t len = x86_opr_intel_imm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_dec);
+    if (sym_cb) {
+        len += sym_cb(buf + len, buflen - len, c, pc_offset);
+    }
+    return len;
+}
+
+static uint x86_opr_intel_const_reg(x86_codec *c,
+    x86_arg a)
+{
+    uint regsz = x86_opr_reg_size(c, a);
+    uint addrsz = x86_codec_addr_size(c);
+
+    switch (a.opr) {
+    case x86_opr_reg_al: return x86_al;
+    case x86_opr_reg_cl: return x86_cl;
+    case x86_opr_reg_ax: return x86_ax;
+    case x86_opr_reg_cx: return x86_cx;
+    case x86_opr_reg_dx: return x86_dx;
+    case x86_opr_reg_bx: return x86_bx;
+    case x86_opr_reg_eax: return x86_eax;
+    case x86_opr_reg_ecx: return x86_ecx;
+    case x86_opr_reg_edx: return x86_edx;
+    case x86_opr_reg_ebx: return x86_ebx;
+    case x86_opr_reg_rax: return x86_rax;
+    case x86_opr_reg_rcx: return x86_rcx;
+    case x86_opr_reg_rdx: return x86_rdx;
+    case x86_opr_reg_rbx: return x86_rbx;
+    case x86_opr_reg_aw: return x86_sized_gpr(c, x86_al, regsz);
+    case x86_opr_reg_cw: return x86_sized_gpr(c, x86_cl, regsz);
+    case x86_opr_reg_dw: return x86_sized_gpr(c, x86_dl, regsz);
+    case x86_opr_reg_bw: return x86_sized_gpr(c, x86_bl, regsz);
+    case x86_opr_reg_pa: return x86_sized_gpr(c, x86_al, addrsz);
+    case x86_opr_reg_pc: return x86_sized_gpr(c, x86_cl, addrsz);
+    case x86_opr_reg_pd: return x86_sized_gpr(c, x86_dl, addrsz);
+    case x86_opr_reg_pb: return x86_sized_gpr(c, x86_bl, addrsz);
+    default: break;
+    }
+    return -1;
+}
+
+static size_t x86_opr_intel_const_str(char *buf, size_t buflen, x86_codec *c,
+    x86_arg a)
+{
+    uint regsz = x86_opr_reg_size(c, a);
+    uint addrsz = x86_codec_addr_size(c);
+    int regname = x86_opr_intel_const_reg(c, a);
+
+    if (regname >= 0) {
+        return snprintf(buf, buflen, "%s", x86_reg_name(regname));
+    }
+
+    switch (a.opr) {
+    case x86_opr_1:
+        return snprintf(buf, buflen, "1");
+    case x86_opr_reg_xmm0:
+        return snprintf(buf, buflen, "%s", "xmm0");
+    case x86_opr_reg_xmm0_7:
+        return snprintf(buf, buflen, "%s", "xmm0_7");
+    case x86_opr_seg_fs:
+        return snprintf(buf, buflen, "fs");
+    case x86_opr_seg_gs:
+        return snprintf(buf, buflen, "gs");
+    case x86_opr_reg_st0:
+        return snprintf(buf, buflen, "st");
+    case x86_opr_reg_psi:
+        return snprintf(buf, buflen, "%s[%s]",
+            x86_ptr_size_str(regsz),
+            x86_reg_name(x86_sized_gpr(c, x86_sil, addrsz)));
+    case x86_opr_reg_pdi:
+        return snprintf(buf, buflen, "%s[%s]",
+            x86_ptr_size_str(regsz),
+            x86_reg_name(x86_sized_gpr(c, x86_dil, addrsz)));
+    default: return snprintf(buf, buflen, "%s", "unknown");
+    }
+}
+
+x86_opr_formatter x86_format_intel_hex =
+{
+    .fmt_const = &x86_opr_intel_const_str,
+    .fmt_imm = &x86_opr_intel_imm_hex_str,
+    .fmt_reg = &x86_opr_intel_reg_str,
+    .fmt_mrm = &x86_opr_intel_mrm_hex_str,
+    .fmt_vec = &x86_opr_intel_vec_str,
+    .fmt_opb = &x86_opr_intel_opb_str,
+    .fmt_is4 = &x86_opr_intel_is4_str,
+    .fmt_ime = &x86_opr_intel_ime_hex_str,
+    .fmt_rel = &x86_opr_intel_rel_hex_str
+};
+
+x86_opr_formatter x86_format_intel_dec =
+{
+    .fmt_const = &x86_opr_intel_const_str,
+    .fmt_imm = &x86_opr_intel_imm_dec_str,
+    .fmt_reg = &x86_opr_intel_reg_str,
+    .fmt_mrm = &x86_opr_intel_mrm_dec_str,
+    .fmt_vec = &x86_opr_intel_vec_str,
+    .fmt_opb = &x86_opr_intel_opb_str,
+    .fmt_is4 = &x86_opr_intel_is4_str,
+    .fmt_ime = &x86_opr_intel_ime_dec_str,
+    .fmt_rel = &x86_opr_intel_rel_dec_str
+};
+
+static size_t x86_format_operand(char *buf, size_t buflen, x86_codec *c,
+    x86_arg a, size_t pc_offset, x86_fmt_symbol sym_cb, x86_opr_formatter *fmt)
+{
+    switch (x86_ord_type_val(a.ord)) {
+    case x86_ord_const:
+        return fmt->fmt_const(buf, buflen, c, a);
+    case x86_ord_reg:
+        return fmt->fmt_reg(buf, buflen, c, a);
+    case x86_ord_mrm:
+        return fmt->fmt_mrm(buf, buflen, c, a);
+    case x86_ord_vec:
+        return fmt->fmt_vec(buf, buflen, c, a);
+    case x86_ord_opr:
+        return fmt->fmt_opb(buf, buflen, c, a);
+    case x86_ord_imm:
+        if (a.opr == x86_opr_rel8 || a.opr == x86_opr_relw) {
+            return fmt->fmt_rel(buf, buflen, c, a, pc_offset, sym_cb);
+        } else if ((a.ord & ~x86_ord_flag_mask) == x86_ord_is4) {
+            return fmt->fmt_is4(buf, buflen, c, a);
+        } else if ((a.ord & ~x86_ord_flag_mask) == x86_ord_ime) {
+            return fmt->fmt_ime(buf, buflen, c, a);
+        } else {
+            return fmt->fmt_imm(buf, buflen, c, a);
+        }
+    default: return 0;
+    }
+}
+
+static size_t x86_format_op_internal(char *buf, size_t buflen, x86_ctx *ctx,
+    x86_codec *c, size_t pc_offset, x86_fmt_symbol sym_cb)
+{
+    const x86_opc_data *d = ctx->idx->map + c->rec;
+    const x86_opr_data *o = x86_opr_table + d->opr;
+    const x86_ord_data *s = x86_ord_table + d->ord;
+
+    x86_operands q = x86_codec_operands(ctx, c);
+
+    size_t len = 0;
+    uint prefix = d->enc & x86_enc_p_mask;
+
+    if (x86_codec_has_lock(c)) {
+        len += snprintf(buf + len, buflen - len, "lock ");
+    }
+    if (x86_codec_has_rep(c) && prefix != x86_enc_p_f3) {
+        len += snprintf(buf + len, buflen - len, "rep ");
+    }
+    if (x86_codec_has_repne(c) && prefix != x86_enc_p_f2) {
+        len += snprintf(buf + len, buflen - len, "repne ");
+    }
+    if (x86_codec_has_wait(c) && prefix != x86_enc_p_9b) {
+        len += snprintf(buf + len, buflen - len, "wait ");
+    }
+
+    len += snprintf(buf + len, buflen - len, "%s", x86_op_names[d->op]);
+
+    for (size_t i = 0; i < array_size(o->opr) && o->opr[i]; i++) {
+        x86_arg a = x86_codec_meta(d->enc, o->opr[i], s->ord[i], q);
+        len += snprintf(buf + len, buflen - len, i == 0 ? "\t" : ", ");
+        len += x86_format_operand(buf + len, buflen - len, c, a,
+            pc_offset, sym_cb, &x86_format_intel_dec);
+    }
+
+    return len;
+}
+
+size_t x86_format_op(char *buf, size_t buflen, x86_ctx *ctx, x86_codec *c)
+{
+    return x86_format_op_internal(buf, buflen, ctx, c, 0, NULL);
+}
+
+size_t x86_format_op_symbol(char *buf, size_t buflen, x86_ctx *ctx,
+    x86_codec *c, size_t pc_offset, x86_fmt_symbol sym_cb)
+{
+    /* note: caller needs to add instruction length to pc_offset */
+    return x86_format_op_internal(buf, buflen, ctx, c, pc_offset, sym_cb);
+}
+
+size_t x86_format_hex(char *buf, size_t buflen, uchar *data, size_t datalen)
+{
+    size_t len = 0;
+    for (size_t i = 0; i < datalen && i < 11; i++) {
+        len += snprintf(buf + len, buflen - len, i == 0 ? "\t" : " ");
+        len += snprintf(buf + len, buflen - len, "%02hhx", data[i]);
+    }
+    size_t tabs = datalen < 10 ? (40 - datalen * 3) / 8 : 1;
+    for (size_t i = 0; i < tabs ; i++) {
+        len += snprintf(buf + len, buflen - len, "\t");
+    }
+    return len;
+}
+
+/*
+ * context
+ */
+
+x86_ctx *x86_ctx_create(uint mode)
+{
+    x86_ctx *ctx = g_malloc(sizeof(x86_ctx));
+    ctx->mode = mode;
+    ctx->idx = x86_table_build(mode);
+    return ctx;
+}
+
+void x86_ctx_destroy(x86_ctx *ctx)
+{
+    g_free(ctx->idx->page_offsets);
+    g_free(ctx->idx->map);
+    g_free(ctx->idx->acc);
+    g_free(ctx);
+}
diff --git a/disas/x86-disas.c b/disas/x86-disas.c
new file mode 100644
index 000000000000..b4f778e6ddbb
--- /dev/null
+++ b/disas/x86-disas.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2024-2025 Michael Clark
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "disas/dis-asm.h"
+#include "disas/x86.h"
+
+static size_t format_hex(char *buf, size_t buflen, uchar *data, size_t nbytes)
+{
+    size_t len = 0;
+    size_t indent = 1;
+
+    const size_t hexcols = 10;
+
+    for (size_t i = 0; i < nbytes; i++) {
+        len += snprintf(buf + len, buflen - len, " %02x" + (i == 0), data[i]);
+    }
+    if (hexcols - nbytes < hexcols) {
+        indent = (hexcols - nbytes) * 3 + 8 - (hexcols * 3) % 8;
+    }
+    for (size_t i = 0; i < indent && len < (buflen - 1); i++) {
+        buf[len++] = ' ';
+    }
+    buf[len] = '\0';
+
+    return len;
+}
+
+static size_t format_symbol(char *buf, size_t buflen, x86_codec *c,
+    size_t pc_offset)
+{
+    ullong addr = pc_offset + c->imm32;
+    return snprintf(buf, buflen, " # 0x%llx", addr);
+}
+
+int print_insn_x86(bfd_vma memaddr, struct disassemble_info *info)
+{
+    x86_buffer buf;
+    x86_codec codec;
+    x86_ctx *ctx;
+    bfd_byte *packet;
+    size_t nfetch, ndecode, len;
+    char str[128];
+    int ret;
+
+    static const size_t max_fetch_len = 16;
+
+    /* read instruction */
+    nfetch = info->buffer_vma + info->buffer_length - memaddr;
+    if (nfetch > max_fetch_len) {
+        nfetch = max_fetch_len;
+    }
+    packet = alloca(nfetch);
+    ret = (*info->read_memory_func)(memaddr, packet, nfetch, info);
+    if (ret != 0) {
+        (*info->memory_error_func)(ret, memaddr, info);
+        return ret;
+    }
+
+    /* decode instruction */
+    ctx = (x86_ctx *)info->private_data;
+    x86_buffer_init_ex(&buf, packet, 0, nfetch);
+    ret = x86_codec_read(ctx, &buf, &codec, &ndecode);
+    if (ret != 0) {
+        return -1;
+    }
+
+    /* format instruction */
+    len = format_hex(str, sizeof(str), packet, ndecode);
+    x86_format_op_symbol(str + len, sizeof(str) - len, ctx, &codec,
+        memaddr + ndecode, format_symbol);
+    (*info->fprintf_func)(info->stream, "%s", str);
+
+    return ndecode;
+}
diff --git a/disas/x86.h b/disas/x86.h
new file mode 100644
index 000000000000..2796095b1afc
--- /dev/null
+++ b/disas/x86.h
@@ -0,0 +1,1860 @@
+/*
+ * Copyright (c) 2024-2025 Michael Clark
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stddef.h>
+#include <string.h>
+
+#include "qemu/bswap.h"
+
+/*
+ * types
+ */
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+typedef unsigned long long ullong;
+typedef long long llong;
+
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef llong i64;
+
+typedef uchar u8;
+typedef ushort u16;
+typedef uint u32;
+typedef ullong u64;
+
+/*
+ * x86 forward declarations
+ */
+
+typedef struct x86_rex x86_rex;
+typedef struct x86_rex2 x86_rex2;
+typedef struct x86_vex2 x86_vex2;
+typedef struct x86_vex3 x86_vex3;
+typedef struct x86_evex x86_evex;
+typedef struct x86_modrm x86_modrm;
+typedef struct x86_sib x86_sib;
+
+typedef struct x86_mem x86_mem;
+typedef struct x86_codec x86_codec;
+typedef struct x86_modeb x86_modeb;
+typedef struct x86_opc_data x86_opc_data;
+typedef struct x86_opr_data x86_opr_data;
+typedef struct x86_ord_data x86_ord_data;
+typedef struct x86_table_idx x86_table_idx;
+typedef struct x86_acc_idx x86_acc_idx;
+typedef struct x86_acc_entry x86_acc_entry;
+typedef struct x86_buffer x86_buffer;
+typedef struct x86_ctx x86_ctx;
+
+/*
+ * simple debug macros
+ */
+
+#define VA_ARGS(...) , ##__VA_ARGS__
+#define x86_debug(fmt, ...) if (debug) \
+    printf(fmt "\n" VA_ARGS(__VA_ARGS__))
+#define x86_debugf(fmt, ...) if (debug) \
+    printf("%s: " fmt "\n", __func__ VA_ARGS(__VA_ARGS__))
+
+/*
+ * register groups
+ */
+
+enum
+{
+    x86_reg_w     = (0 << 5),
+    x86_reg_b     = (1 << 5),
+    x86_reg_d     = (2 << 5),
+    x86_reg_q     = (3 << 5),
+
+    x86_reg_mmx   = (4 << 5),
+    x86_reg_xmm   = (5 << 5),
+    x86_reg_ymm   = (6 << 5),
+    x86_reg_zmm   = (7 << 5),
+
+    x86_reg_kmask = (8 << 5),
+    x86_reg_bl    = (9 << 5),
+    x86_reg_fpu   = (10 << 5),
+    x86_reg_bnd   = (11 << 5),
+
+    x86_reg_dreg  = (12 << 5),
+    x86_reg_creg  = (13 << 5),
+    x86_reg_sreg  = (14 << 5),
+    x86_reg_sys   = (15 << 5),
+};
+
+/*
+ * prefix byte
+ */
+
+enum
+{
+    x86_pb_26    = 0x26,
+    x86_pb_2e    = 0x2e,
+    x86_pb_36    = 0x36,
+    x86_pb_3e    = 0x3e,
+    x86_pb_41    = 0x41,
+    x86_pb_42    = 0x42,
+    x86_pb_44    = 0x44,
+    x86_pb_48    = 0x48,
+    x86_pb_62    = 0x62,
+    x86_pb_64    = 0x64,
+    x86_pb_65    = 0x65,
+    x86_pb_66    = 0x66,
+    x86_pb_67    = 0x67,
+    x86_pb_9b    = 0x9b,
+    x86_pb_c4    = 0xc4,
+    x86_pb_c5    = 0xc5,
+    x86_pb_d5    = 0xd5,
+    x86_pb_f0    = 0xf0,
+    x86_pb_f2    = 0xf2,
+    x86_pb_f3    = 0xf3,
+
+    x86_pb_es    = x86_pb_26,
+    x86_pb_cs    = x86_pb_2e, /* branch not taken with Jcc */
+    x86_pb_ss    = x86_pb_36,
+    x86_pb_ds    = x86_pb_3e, /* branch taken with Jcc (CET notrack) */
+    x86_pb_rex_b = x86_pb_41,
+    x86_pb_rex_x = x86_pb_42,
+    x86_pb_rex_r = x86_pb_44,
+    x86_pb_rex_w = x86_pb_48,
+    x86_pb_evex  = x86_pb_62,
+    x86_pb_fs    = x86_pb_64,
+    x86_pb_gs    = x86_pb_65,
+    x86_pb_osize = x86_pb_66,
+    x86_pb_asize = x86_pb_67,
+    x86_pb_wait  = x86_pb_9b, /* x87 */
+    x86_pb_vex3  = x86_pb_c4,
+    x86_pb_vex2  = x86_pb_c5,
+    x86_pb_rex2  = x86_pb_d5,
+    x86_pb_lock  = x86_pb_f0,
+    x86_pb_repne = x86_pb_f2, /* REPNE/REPNZ and XACQUIRE */
+    x86_pb_rep   = x86_pb_f3, /* REP/REPE/REPZ and XRELEASE */
+};
+
+/*
+ * mod values
+ */
+
+enum
+{
+    x86_mod_disp0,
+    x86_mod_disp8,
+    x86_mod_dispw,
+    x86_mod_reg,
+};
+
+/*
+ * SIB scale
+ */
+
+enum
+{
+    x86_scale_1,
+    x86_scale_2,
+    x86_scale_4,
+    x86_scale_8,
+};
+
+/*
+ * segment
+ */
+
+enum
+{
+    x86_seg_none,
+    x86_seg_es,
+    x86_seg_cs,
+    x86_seg_ss,
+    x86_seg_ds,
+    x86_seg_fs,
+    x86_seg_gs
+};
+
+/*
+ * REX flags
+ */
+
+enum
+{
+    x86_rex_b = 0x01,
+    x86_rex_x = 0x02,
+    x86_rex_r = 0x04,
+    x86_rex_w = 0x08,
+};
+
+/*
+ * VEX prefix
+ */
+
+enum
+{
+    x86_pfx_none,
+    x86_pfx_66,
+    x86_pfx_f3,
+    x86_pfx_f2,
+    x86_pfx_9b,
+    x86_pfx_rexw = 8
+};
+
+/*
+ * VEX map
+ */
+
+enum
+{
+    x86_map_none,
+    x86_map_0f,
+    x86_map_0f38,
+    x86_map_0f3a,
+    x86_map_map4,
+    x86_map_map5,
+    x86_map_map6,
+};
+
+/*
+ * VEX length
+ */
+
+enum
+{
+    x86_vex_l0 = 0,
+    x86_vex_l1 = 1,
+    x86_vex_l2 = 2,
+    x86_vex_l3 = 3,
+    x86_vex_lz = x86_vex_l0,
+    x86_vex_lig = x86_vex_l3,
+    x86_vex_l128 = x86_vex_l0,
+    x86_vex_l256 = x86_vex_l1,
+    x86_vex_l512 = x86_vex_l2,
+};
+
+/*
+ * VEX width
+ */
+
+enum
+{
+    x86_vex_w0,
+    x86_vex_w1,
+};
+
+/*
+ * condition codes (x86)
+ */
+
+enum
+{
+    x86_cc_o     = 0,  /* overflow */
+    x86_cc_no    = 1,  /* no overflow */
+    x86_cc_b     = 2,  /* below */
+    x86_cc_c     = 2,  /* carry */
+    x86_cc_nae   = 2,  /* neither-above-nor-equal */
+    x86_cc_nb    = 3,  /* not-below */
+    x86_cc_nc    = 3,  /* not-carry */
+    x86_cc_ae    = 3,  /* above-or-equal */
+    x86_cc_e     = 4,  /* equal */
+    x86_cc_z     = 4,  /* zero */
+    x86_cc_ne    = 5,  /* not-equal */
+    x86_cc_nz    = 5,  /* not-zero */
+    x86_cc_be    = 6,  /* below-or-equal */
+    x86_cc_na    = 6,  /* not-above */
+    x86_cc_nbe   = 7,  /* neither-below-nor-equal */
+    x86_cc_a     = 7,  /* above */
+    x86_cc_s     = 8,  /* sign */
+    x86_cc_ns    = 9,  /* no-sign */
+    x86_cc_p     = 10, /* parity */
+    x86_cc_pe    = 10, /* parity-even */
+    x86_cc_np    = 11, /* no-parity */
+    x86_cc_po    = 11, /* parity-odd */
+    x86_cc_l     = 12, /* less */
+    x86_cc_nge   = 12, /* neither-greater-nor-equal */
+    x86_cc_nl    = 13, /* not-less */
+    x86_cc_ge    = 13, /* greater-or-equal */
+    x86_cc_le    = 14, /* less-or-equal */
+    x86_cc_ng    = 14, /* not-greater */
+    x86_cc_nle   = 15, /* neither-less-nor-equal- */
+    x86_cc_g     = 15, /* greater */
+};
+
+/*
+ * condition codes (agnostic)
+ */
+
+enum
+{
+    x86_o        = 0,  /* overflow (signed) */
+    x86_no       = 1,  /* not-overflow (signed) */
+    x86_c        = 2,  /* carry */
+    x86_nc       = 3,  /* not-carry */
+    x86_ltu      = 2,  /* less-than (unsigned) */
+    x86_geu      = 3,  /* greater-than-or-equal (unsigned) */
+    x86_eq       = 4,  /* equal */
+    x86_ne       = 5,  /* not-equal */
+    x86_leu      = 6,  /* less-than-or-equal (unsigned) */
+    x86_gtu      = 7,  /* greater-than (unsigned) */
+    x86_s        = 8,  /* sign */
+    x86_ns       = 9,  /* not-sign */
+    x86_lt       = 12, /* less-than (signed) */
+    x86_ge       = 13, /* greater-than-or-equal (signed) */
+    x86_le       = 14, /* less-than-or-equal (signed) */
+    x86_gt       = 15, /* greater-than (signed) */
+};
+
+/*
+ * compare packed operators
+ *
+ * oq = ordered queit, os = ordered signalling
+ * uq = unordered queit, us = unordered signalling
+ */
+
+enum
+{
+    x86_eq_oq    = 0x00,
+    x86_lt_os    = 0x01,
+    x86_le_os    = 0x02,
+    x86_unord_q  = 0x03,
+    x86_neq_uq   = 0x04,
+    x86_nlt_us   = 0x05,
+    x86_nle_us   = 0x06,
+    x86_ord_q    = 0x07,
+    x86_eq_uq    = 0x08,
+    x86_nge_us   = 0x09,
+    x86_ngt_us   = 0x0a,
+    x86_false_oq = 0x0b,
+    x86_neq_oq   = 0x0c,
+    x86_ge_os    = 0x0d,
+    x86_gt_os    = 0x0e,
+    x86_true_uq  = 0x0f,
+    x86_eq_os    = 0x10,
+    x86_lt_oq    = 0x11,
+    x86_le_oq    = 0x12,
+    x86_unord_s  = 0x13,
+    x86_neq_us   = 0x14,
+    x86_nlt_uq   = 0x15,
+    x86_nle_uq   = 0x16,
+    x86_ord_s    = 0x17,
+    x86_eq_us    = 0x18,
+    x86_nge_uq   = 0x19,
+    x86_ngt_uq   = 0x1a,
+    x86_false_os = 0x1b,
+    x86_neq_os   = 0x1c,
+    x86_ge_oq    = 0x1d,
+    x86_gt_oq    = 0x1e,
+    x86_true_us  = 0x1f,
+};
+
+/*
+ * prefix encoding structures
+ */
+
+/*
+ * ModRM struct
+ */
+
+struct x86_modrm
+{
+    union {
+        uchar data[1];
+        /* note: bitfield used for documentation purposes only */
+        struct {
+            /* [0:2] */ uchar rm:3;
+            /* [3:5] */ uchar reg:3;
+            /* [6:7] */ uchar mod:2;
+        };
+    };
+};
+
+/*
+ * ModRM values
+ */
+
+enum
+{
+    x86_rm_sp_sib       = 4,
+    x86_rm_bp_disp0     = 5,
+
+    x86_modrm_rm_mask   = 7,
+    x86_modrm_reg_shift = 3,
+    x86_modrm_reg_mask  = 7,
+    x86_modrm_mod_shift = 6,
+    x86_modrm_mod_mask  = 3,
+};
+
+/*
+ * ModRM accessors
+ */
+
+static inline uint x86_modrm_rm(uchar modrm) {
+    return modrm & x86_modrm_rm_mask;
+}
+static inline uint x86_modrm_reg(uchar modrm) {
+    return (modrm >> x86_modrm_reg_shift) & x86_modrm_reg_mask;
+}
+static inline uint x86_modrm_mod(uchar modrm) {
+    return (modrm >> x86_modrm_mod_shift) & x86_modrm_mod_mask;
+}
+
+/*
+ * SIB struct
+ */
+
+struct x86_sib
+{
+    union {
+        uchar data[1];
+        /* note: bitfield used for documentation purposes only */
+        struct {
+            /* [0:2] */ uchar b : 3; /* base[0:2] */
+            /* [3:5] */ uchar x : 3; /* index[0:2] */
+            /* [6:7] */ uchar s : 2; /* scale[0:2] */
+        };
+    };
+};
+
+/*
+ * SIB values
+ */
+
+enum {
+    x86_sib_b_mask = 7,
+    x86_sib_x_shift = 3,
+    x86_sib_x_mask = 7,
+    x86_sib_s_shift = 6,
+    x86_sib_s_mask = 3
+};
+
+/*
+ * SIB accessors
+ */
+
+static inline uint x86_sib_b(uchar sib) {
+    return sib & x86_sib_b_mask;
+}
+static inline uint x86_sib_x(uchar sib) {
+    return (sib >> x86_sib_x_shift) & x86_sib_x_mask;
+}
+static inline uint x86_sib_s(uchar sib) {
+    return (sib >> x86_sib_s_shift) & x86_sib_s_mask;
+}
+
+/*
+ * REX struct
+ */
+
+struct x86_rex
+{
+    union {
+        uchar data[1];
+        /* note: bitfield used for documentation purposes only */
+        struct {
+            /* [0] */ uchar b3 : 1; /* base[3] */
+            /* [1] */ uchar x3 : 1; /* index[3] */
+            /* [2] */ uchar r3 : 1; /* reg[3] */
+            /* [3] */ uchar w  : 1; /* width */
+        };
+    };
+};
+
+
+/*
+ * REX2 struct
+ */
+
+struct x86_rex2
+{
+    union {
+        uchar data[1];
+        /* note: bitfield used for documentation purposes only */
+        struct {
+            /* [0] */ uchar b3 : 1; /* base[3] */
+            /* [1] */ uchar x3 : 1; /* index[3] */
+            /* [2] */ uchar r3 : 1; /* reg[3] */
+            /* [3] */ uchar w  : 1; /* width */
+            /* [4] */ uchar b4 : 1; /* base[4] */
+            /* [5] */ uchar x4 : 1; /* index[4] */
+            /* [6] */ uchar r4 : 1; /* reg[4] */
+            /* [7] */ uchar m0 : 1; /* map prefix 0x0f */
+        };
+    };
+};
+
+
+/*
+ * VEX2 struct
+ */
+
+struct x86_vex2
+{
+    union {
+        uchar data[1];
+        /* note: bitfield used for documentation purposes only */
+        struct {
+            /* [0:1] */ uchar p   : 2; /* see x86_pfx */
+            /*   [2] */ uchar l   : 1; /* len 0=128 1=256 */
+            /* [3:6] */ uchar vn  : 4; /* ~vec[0:3] */
+            /*   [7] */ uchar r3n : 1; /* ~reg[3] */
+        };
+    };
+};
+
+/*
+ * VEX3 struct
+ */
+
+struct x86_vex3
+{
+    union {
+        uchar data[2];
+        /* note: bitfield used for documentation purposes only */
+        struct {
+            /* [0:4] */ uchar m   : 5; /* see x86_map */
+            /*   [5] */ uchar b3n : 1; /* ~base[3] */
+            /*   [6] */ uchar x3n : 1; /* ~index[3] */
+            /*   [7] */ uchar r3n : 1; /* ~reg[3] */
+
+            /* [0:1] */ uchar p   : 2; /* see x86_pfx */
+            /*   [2] */ uchar l   : 1; /* len 0=128 1=256 */
+            /* [3:6] */ uchar vn  : 4; /* ~vec[0:3] */
+            /*   [7] */ uchar w   : 1; /* width */
+        };
+    };
+};
+
+/*
+ * EVEX struct
+ */
+
+struct x86_evex
+{
+    union {
+        uchar data[3];
+        /* note: bitfield used for documentation purposes only */
+        struct {
+            /* [0:2] */ uchar m   : 3; /* see x86_map */
+            /*   [3] */ uchar b4  : 1; /* base[4] */
+            /*   [4] */ uchar r4n : 1; /* ~reg[4] */
+            /*   [5] */ uchar b3n : 1; /* ~base[3] */
+            /*   [6] */ uchar x3n : 1; /* ~index[3] */
+            /*   [7] */ uchar r3n : 1; /* ~reg[3] */
+
+            /* [0:1] */ uchar p   : 2; /* see x86_pfx */
+            /*   [2] */ uchar x4n : 1; /* ~index[4] */
+            /* [3:6] */ uchar vn  : 4; /* ~vec[0:3] */
+            /*   [7] */ uchar w   : 1; /* width */
+
+            /* [0:2] */ uchar a   : 3; /* kmask[0:2] or map4.NF[2] no-flags */
+            /*   [3] */ uchar v4n : 1; /* ~vec[4] */
+            /*   [4] */ uchar br  : 1; /* broadcast or map4.ND[4] new-dest */
+            /* [5:6] */ uchar l   : 2; /* len 00=128 01=256, 10=512 */
+            /*   [7] */ uchar z   : 1; /* merge/zero */
+        };
+    };
+};
+
+/*
+ * opcode encoding
+ */
+
+enum
+{
+    x86_enc_w_shift          = 0,
+    x86_enc_m_shift          = x86_enc_w_shift + 3,
+    x86_enc_p_shift          = x86_enc_m_shift + 3,
+    x86_enc_l_shift          = x86_enc_p_shift + 4,
+    x86_enc_t_shift          = x86_enc_l_shift + 3,
+    x86_enc_o_shift          = x86_enc_t_shift + 2,
+    x86_enc_f_shift          = x86_enc_o_shift + 1,
+    x86_enc_i_shift          = x86_enc_f_shift + 3,
+    x86_enc_j_shift          = x86_enc_i_shift + 3,
+    x86_enc_r_shift          = x86_enc_j_shift + 2,
+    x86_enc_s_shift          = x86_enc_r_shift + 3,
+
+    x86_enc_w_w0             = (1 << x86_enc_w_shift),
+    x86_enc_w_w1             = (2 << x86_enc_w_shift),
+    x86_enc_w_wb             = (3 << x86_enc_w_shift),
+    x86_enc_w_wn             = (4 << x86_enc_w_shift),
+    x86_enc_w_wx             = (5 << x86_enc_w_shift),
+    x86_enc_w_ww             = (6 << x86_enc_w_shift),
+    x86_enc_w_wig            = (7 << x86_enc_w_shift),
+    x86_enc_w_mask           = (7 << x86_enc_w_shift),
+
+    x86_enc_m_none           = (0 << x86_enc_m_shift),
+    x86_enc_m_0f             = (1 << x86_enc_m_shift),
+    x86_enc_m_0f38           = (2 << x86_enc_m_shift),
+    x86_enc_m_0f3a           = (3 << x86_enc_m_shift),
+    x86_enc_m_map4           = (4 << x86_enc_m_shift),
+    x86_enc_m_map5           = (5 << x86_enc_m_shift),
+    x86_enc_m_map6           = (6 << x86_enc_m_shift),
+    x86_enc_m_mask           = (7 << x86_enc_m_shift),
+
+    x86_enc_p_none           = (0 << x86_enc_p_shift),
+    x86_enc_p_66             = (1 << x86_enc_p_shift),
+    x86_enc_p_f3             = (2 << x86_enc_p_shift),
+    x86_enc_p_f2             = (3 << x86_enc_p_shift),
+    x86_enc_p_9b             = (4 << x86_enc_p_shift),
+    x86_enc_p_rexw           = (8 << x86_enc_p_shift),
+    x86_enc_p_mask           = (7 << x86_enc_p_shift),
+    x86_enc_prexw_mask       = (15 << x86_enc_p_shift),
+
+    x86_enc_l_lz             = (1 << x86_enc_l_shift),
+    x86_enc_l_l0             = (2 << x86_enc_l_shift),
+    x86_enc_l_l1             = (3 << x86_enc_l_shift),
+    x86_enc_l_128            = (4 << x86_enc_l_shift),
+    x86_enc_l_256            = (5 << x86_enc_l_shift),
+    x86_enc_l_512            = (6 << x86_enc_l_shift),
+    x86_enc_l_lig            = (7 << x86_enc_l_shift),
+    x86_enc_l_mask           = (7 << x86_enc_l_shift),
+
+    x86_enc_t_none           = (0 << x86_enc_t_shift),
+    x86_enc_t_lex            = (1 << x86_enc_t_shift),
+    x86_enc_t_vex            = (2 << x86_enc_t_shift),
+    x86_enc_t_evex           = (3 << x86_enc_t_shift),
+    x86_enc_t_mask           = (3 << x86_enc_t_shift),
+
+    x86_enc_o_opcode_r       = (1 << x86_enc_o_shift), /* XX+r */
+    x86_enc_o_mask           = (1 << x86_enc_o_shift),
+
+    x86_enc_f_modrm_r        = (1 << x86_enc_f_shift), /* /r */
+    x86_enc_f_modrm_n        = (2 << x86_enc_f_shift), /* /N */
+    x86_enc_f_opcode         = (3 << x86_enc_f_shift), /* XX */
+    x86_enc_f_opcode_r       = (4 << x86_enc_f_shift), /* XX+r */
+    x86_enc_f_mask           = (7 << x86_enc_f_shift),
+
+    x86_enc_i_ib             = (1 << x86_enc_i_shift),
+    x86_enc_i_iw             = (2 << x86_enc_i_shift),
+    x86_enc_i_iwd            = (3 << x86_enc_i_shift),
+    x86_enc_i_i16            = (4 << x86_enc_i_shift),
+    x86_enc_i_i32            = (5 << x86_enc_i_shift),
+    x86_enc_i_i64            = (6 << x86_enc_i_shift),
+    x86_enc_i_mask           = (7 << x86_enc_i_shift),
+
+    x86_enc_j_ib             = (1 << x86_enc_j_shift),
+    x86_enc_j_i16            = (2 << x86_enc_j_shift),
+    x86_enc_j_mask           = (3 << x86_enc_j_shift),
+
+    x86_enc_r_rep            = (1 << x86_enc_r_shift),
+    x86_enc_r_lock           = (2 << x86_enc_r_shift),
+    x86_enc_r_norexb         = (4 << x86_enc_r_shift),
+    x86_enc_r_mask           = (7 << x86_enc_r_shift),
+
+    x86_enc_s_o16            = (1 << x86_enc_s_shift),
+    x86_enc_s_o32            = (2 << x86_enc_s_shift),
+    x86_enc_s_o64            = (3 << x86_enc_s_shift),
+    x86_enc_s_a16            = (4 << x86_enc_s_shift),
+    x86_enc_s_a32            = (5 << x86_enc_s_shift),
+    x86_enc_s_a64            = (6 << x86_enc_s_shift),
+    x86_enc_s_mask           = (7 << x86_enc_s_shift),
+
+    x86_enc_immediate_mask   = x86_enc_i_mask | x86_enc_j_mask,
+    x86_enc_suffix_mask      = x86_enc_r_mask | x86_enc_s_mask,
+    x86_enc_param_mask       = x86_enc_immediate_mask | x86_enc_suffix_mask
+};
+
+/*
+ * opcode encoding accessors
+ */
+
+static inline uint x86_enc_width(uint enc) {
+    return enc & x86_enc_w_mask;
+}
+static inline uint x86_enc_prefix(uint enc) {
+    return enc & x86_enc_prexw_mask;
+}
+static inline uint x86_enc_length(uint enc) {
+    return enc & x86_enc_l_mask;
+}
+static inline uint x86_enc_opcode(uint enc) {
+    return enc & x86_enc_o_mask;
+}
+static inline uint x86_enc_func(uint enc) {
+    return enc & x86_enc_f_mask;
+}
+static inline uint x86_enc_map(uint enc) {
+    return enc & x86_enc_m_mask;
+}
+static inline uint x86_enc_imm(uint enc) {
+    return enc & x86_enc_i_mask;
+}
+static inline uint x86_enc_imm2(uint enc) {
+    return enc & x86_enc_j_mask;
+}
+static inline uint x86_enc_type(uint enc) {
+    return enc & x86_enc_t_mask;
+}
+static inline uint x86_enc_suffix(uint enc) {
+    return enc & x86_enc_suffix_mask;
+}
+static inline uint x86_enc_leading(uint enc) {
+    return enc & ~x86_enc_param_mask;
+}
+static inline uint x86_enc_has_rep(uint enc) {
+    return enc & x86_enc_r_rep;
+}
+static inline uint x86_enc_has_lock(uint enc) {
+    return enc & x86_enc_r_lock;
+}
+static inline uint x86_enc_has_norexb(uint enc) {
+    return enc & x86_enc_r_norexb;
+}
+static inline uint x86_enc_has_o16(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_o16;
+}
+static inline uint x86_enc_has_o32(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_o32;
+}
+static inline uint x86_enc_has_o64(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_o64;
+}
+static inline uint x86_enc_has_a16(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_a16;
+}
+static inline uint x86_enc_has_a32(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_a32;
+}
+static inline uint x86_enc_has_a64(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_a64;
+}
+
+/*
+ * operand encoding
+ */
+
+enum
+{
+    x86_opr_none,
+
+    /* operand field shifts */
+    x86_opr_ot               = 8,
+    x86_opr_os               = x86_opr_ot + 4,
+    x86_opr_ms               = x86_opr_os + 4,
+    x86_opr_av               = x86_opr_ms + 4,
+    x86_opr_et               = x86_opr_av + 4,
+    x86_opr_ew               = x86_opr_et + 2,
+    x86_opr_ec               = x86_opr_ew + 3,
+
+    x86_opr_ot_mask          = (15 << x86_opr_ot), /* operand type */
+    x86_opr_os_mask          = (15 << x86_opr_os), /* operand size */
+    x86_opr_ms_mask          = (15 << x86_opr_ms), /* memory size */
+    x86_opr_av_mask          = (15 << x86_opr_av), /* auxiliary value */
+    x86_opr_et_mask          = (3 << x86_opr_et), /* element type */
+    x86_opr_ew_mask          = (7 << x86_opr_ew), /* element width */
+    x86_opr_ec_mask          = (7 << x86_opr_ec), /* element count */
+
+    /* operand flags */
+    x86_opr_mem              = (1 << 0),
+    x86_opr_ind              = (1 << 1),
+    x86_opr_bcst             = (1 << 2),
+    x86_opr_flag_er          = (1 << 3),
+    x86_opr_flag_k           = (1 << 4),
+    x86_opr_flag_sae         = (1 << 5),
+    x86_opr_flag_z           = (1 << 6),
+    x86_opr_flag_rs          = (1 << 7),
+
+    /* operand type */
+    x86_opr_const            = (1 << x86_opr_ot), /* constant */
+    x86_opr_imm              = (2 << x86_opr_ot), /* immediate */
+    x86_opr_reg              = (3 << x86_opr_ot), /* register */
+    x86_opr_vec              = (4 << x86_opr_ot), /* vector */
+    x86_opr_mmx              = (5 << x86_opr_ot), /* mmx */
+    x86_opr_st               = (6 << x86_opr_ot), /* x87 */
+    x86_opr_k                = (7 << x86_opr_ot), /* mask */
+    x86_opr_seg              = (8 << x86_opr_ot), /* segment */
+    x86_opr_creg             = (9 << x86_opr_ot), /* control */
+    x86_opr_dreg             = (10 << x86_opr_ot), /* debug */
+    x86_opr_bnd              = (11 << x86_opr_ot), /* bound */
+    x86_opr_type_mask        = x86_opr_ot_mask,
+
+    /* operand sizes */
+    x86_opr_size_8           = (1 << x86_opr_os),
+    x86_opr_size_16          = (2 << x86_opr_os),
+    x86_opr_size_32          = (3 << x86_opr_os),
+    x86_opr_size_64          = (4 << x86_opr_os),
+    x86_opr_size_128         = (5 << x86_opr_os),
+    x86_opr_size_256         = (6 << x86_opr_os),
+    x86_opr_size_512         = (7 << x86_opr_os),
+    x86_opr_size_1024        = (8 << x86_opr_os),
+    x86_opr_size_80          = (9 << x86_opr_os),
+    x86_opr_size_wd          = (13 << x86_opr_os),
+    x86_opr_size_w           = (14 << x86_opr_os),
+    x86_opr_size_a           = (15 << x86_opr_os),
+    x86_opr_size_mask        = x86_opr_os_mask,
+
+    /* sized register */
+    x86_opr_r8               = x86_opr_reg | x86_opr_size_8,
+    x86_opr_r16              = x86_opr_reg | x86_opr_size_16,
+    x86_opr_r32              = x86_opr_reg | x86_opr_size_32,
+    x86_opr_r64              = x86_opr_reg | x86_opr_size_64,
+    x86_opr_rw               = x86_opr_reg | x86_opr_size_w,
+    x86_opr_ra               = x86_opr_reg | x86_opr_size_a,
+    x86_opr_mm               = x86_opr_vec | x86_opr_size_64,
+    x86_opr_xmm              = x86_opr_vec | x86_opr_size_128,
+    x86_opr_ymm              = x86_opr_vec | x86_opr_size_256,
+    x86_opr_zmm              = x86_opr_vec | x86_opr_size_512,
+
+    /* sized memory */
+    x86_opr_m8               = x86_opr_mem | (1 << x86_opr_ms), /* byte */
+    x86_opr_m16              = x86_opr_mem | (2 << x86_opr_ms), /* word */
+    x86_opr_m32              = x86_opr_mem | (3 << x86_opr_ms), /* dword */
+    x86_opr_m64              = x86_opr_mem | (4 << x86_opr_ms), /* qword */
+    x86_opr_m128             = x86_opr_mem | (5 << x86_opr_ms), /* oword/xmmword */
+    x86_opr_m256             = x86_opr_mem | (6 << x86_opr_ms), /* ymmword */
+    x86_opr_m512             = x86_opr_mem | (7 << x86_opr_ms), /* zmmword */
+    x86_opr_m1024            = x86_opr_mem | (8 << x86_opr_ms),
+    x86_opr_m80              = x86_opr_mem | (9 << x86_opr_ms), /* tword/tbyte */
+    x86_opr_m384             = x86_opr_mem | (10 << x86_opr_ms),
+    x86_opr_mib              = x86_opr_mem | (11 << x86_opr_ms), /* bound */
+    x86_opr_vm32             = x86_opr_mem | (12 << x86_opr_ms), /* vecmem32 */
+    x86_opr_vm64             = x86_opr_mem | (13 << x86_opr_ms), /* vecmem64 */
+    x86_opr_mw               = x86_opr_mem | (14 << x86_opr_ms), /* 16/32/64 */
+    x86_opr_mp               = x86_opr_mem | (15 << x86_opr_ms), /* pointer */
+    x86_opr_mem_mask         = x86_opr_mem | (15 << x86_opr_ms),
+
+    /* unsized register / sized memory */
+    x86_opr_r_m8             = x86_opr_reg | x86_opr_m8,
+    x86_opr_r_m16            = x86_opr_reg | x86_opr_m16,
+    x86_opr_r_m32            = x86_opr_reg | x86_opr_m32,
+    x86_opr_r_m64            = x86_opr_reg | x86_opr_m64,
+
+    /* sized register / sized memory */
+    x86_opr_r8_m8            = x86_opr_r8  | x86_opr_m8,
+    x86_opr_r16_m8           = x86_opr_r16 | x86_opr_m8,
+    x86_opr_r16_m16          = x86_opr_r16 | x86_opr_m16,
+    x86_opr_r32_m8           = x86_opr_r32 | x86_opr_m8,
+    x86_opr_r32_m16          = x86_opr_r32 | x86_opr_m16,
+    x86_opr_r32_m32          = x86_opr_r32 | x86_opr_m32,
+    x86_opr_r64_m8           = x86_opr_r64 | x86_opr_m8,
+    x86_opr_r64_m16          = x86_opr_r64 | x86_opr_m16,
+    x86_opr_r64_m32          = x86_opr_r64 | x86_opr_m32,
+    x86_opr_r64_m64          = x86_opr_r64 | x86_opr_m64,
+    x86_opr_rw_mw            = x86_opr_rw  | x86_opr_mw,
+
+    /* sized vector / sized memory */
+    x86_opr_mm_m32           = x86_opr_mm  | x86_opr_m32,
+    x86_opr_mm_m64           = x86_opr_mm  | x86_opr_m64,
+    x86_opr_xmm_m8           = x86_opr_xmm | x86_opr_m8,
+    x86_opr_xmm_m16          = x86_opr_xmm | x86_opr_m16,
+    x86_opr_xmm_m32          = x86_opr_xmm | x86_opr_m32,
+    x86_opr_xmm_m64          = x86_opr_xmm | x86_opr_m64,
+    x86_opr_xmm_m128         = x86_opr_xmm | x86_opr_m128,
+    x86_opr_ymm_m256         = x86_opr_ymm | x86_opr_m256,
+    x86_opr_zmm_m512         = x86_opr_zmm | x86_opr_m512,
+
+    /* sized vector memory */
+    x86_opr_vm32x            = x86_opr_xmm | x86_opr_vm32,
+    x86_opr_vm32y            = x86_opr_ymm | x86_opr_vm32,
+    x86_opr_vm32z            = x86_opr_zmm | x86_opr_vm32,
+    x86_opr_vm64x            = x86_opr_xmm | x86_opr_vm64,
+    x86_opr_vm64y            = x86_opr_ymm | x86_opr_vm64,
+    x86_opr_vm64z            = x86_opr_zmm | x86_opr_vm64,
+
+    /* mask / sized memory */
+    x86_opr_k_m8             = x86_opr_k   | x86_opr_m8,
+    x86_opr_k_m16            = x86_opr_k   | x86_opr_m16,
+    x86_opr_k_m32            = x86_opr_k   | x86_opr_m32,
+    x86_opr_k_m64            = x86_opr_k   | x86_opr_m64,
+
+    /* bound / memory */
+    x86_opr_bnd_mem          = x86_opr_bnd | x86_opr_mem,
+
+    /* memory operand alias */
+    x86_opr_m16int           = x86_opr_m16,
+    x86_opr_m32fp            = x86_opr_m32,
+    x86_opr_m32int           = x86_opr_m32,
+    x86_opr_m64fp            = x86_opr_m64,
+    x86_opr_m64int           = x86_opr_m64,
+    x86_opr_m80bcd           = x86_opr_m80,
+    x86_opr_m80dec           = x86_opr_m80,
+    x86_opr_m80fp            = x86_opr_m80,
+
+    /* sized immediate */
+    x86_opr_ib               = x86_opr_imm | x86_opr_size_8,
+    x86_opr_i16              = x86_opr_imm | x86_opr_size_16,
+    x86_opr_i32              = x86_opr_imm | x86_opr_size_32,
+    x86_opr_i64              = x86_opr_imm | x86_opr_size_64,
+    x86_opr_iwd              = x86_opr_imm | x86_opr_size_wd,
+    x86_opr_iw               = x86_opr_imm | x86_opr_size_w,
+
+    /* memory offset */
+    x86_opr_moffs            = x86_opr_imm | x86_opr_mem | x86_opr_size_w,
+
+    /* constant */
+    x86_opr_1                = x86_opr_const | (1 << x86_opr_av),
+
+    /* registers sequential */
+    x86_opr_flag_rs2         = x86_opr_flag_rs | (1 << x86_opr_av),
+    x86_opr_flag_rs4         = x86_opr_flag_rs | (2 << x86_opr_av),
+
+    /* sized broadcast */
+    x86_opr_m16bcst          = x86_opr_bcst | (1 << x86_opr_av),
+    x86_opr_m32bcst          = x86_opr_bcst | (2 << x86_opr_av),
+    x86_opr_m64bcst          = x86_opr_bcst | (3 << x86_opr_av),
+    x86_opr_bcst_mask        = x86_opr_bcst | x86_opr_av_mask,
+
+    /* sized vector / sized memory / broadcast */
+    x86_opr_xmm_m32_m16bcst  = x86_opr_mm_m32   | x86_opr_m16bcst,
+    x86_opr_xmm_m64_m16bcst  = x86_opr_xmm_m64  | x86_opr_m16bcst,
+    x86_opr_xmm_m64_m32bcst  = x86_opr_xmm_m64  | x86_opr_m32bcst,
+    x86_opr_xmm_m128_m16bcst = x86_opr_xmm_m128 | x86_opr_m16bcst,
+    x86_opr_xmm_m128_m32bcst = x86_opr_xmm_m128 | x86_opr_m32bcst,
+    x86_opr_xmm_m128_m64bcst = x86_opr_xmm_m128 | x86_opr_m64bcst,
+    x86_opr_ymm_m256_m16bcst = x86_opr_ymm_m256 | x86_opr_m16bcst,
+    x86_opr_ymm_m256_m32bcst = x86_opr_ymm_m256 | x86_opr_m32bcst,
+    x86_opr_ymm_m256_m64bcst = x86_opr_ymm_m256 | x86_opr_m64bcst,
+    x86_opr_zmm_m512_m16bcst = x86_opr_zmm_m512 | x86_opr_m16bcst,
+    x86_opr_zmm_m512_m32bcst = x86_opr_zmm_m512 | x86_opr_m32bcst,
+    x86_opr_zmm_m512_m64bcst = x86_opr_zmm_m512 | x86_opr_m64bcst,
+
+    /* relative displacement */
+    x86_opr_rel8             = x86_opr_imm | x86_opr_ind | x86_opr_size_8,
+    x86_opr_relw             = x86_opr_imm | x86_opr_ind | x86_opr_size_w,
+
+    /* enter / far displacement */
+    x86_opr_far16_16         = x86_opr_imm | x86_opr_ind | (1 << x86_opr_av),
+    x86_opr_far16_32         = x86_opr_imm | x86_opr_ind | (2 << x86_opr_av),
+
+    /* far memory indirect */
+    x86_opr_memfar16_16      = x86_opr_mem | x86_opr_ind | (3 << x86_opr_av),
+    x86_opr_memfar16_32      = x86_opr_mem | x86_opr_ind | (4 << x86_opr_av),
+    x86_opr_memfar16_64      = x86_opr_mem | x86_opr_ind | (5 << x86_opr_av),
+
+    /* implicit register */
+    x86_opr_reg_a            = x86_opr_reg | x86_opr_ind | (1 << x86_opr_av),
+    x86_opr_reg_c            = x86_opr_reg | x86_opr_ind | (2 << x86_opr_av),
+    x86_opr_reg_d            = x86_opr_reg | x86_opr_ind | (3 << x86_opr_av),
+    x86_opr_reg_b            = x86_opr_reg | x86_opr_ind | (4 << x86_opr_av),
+    x86_opr_reg_si           = x86_opr_reg | x86_opr_ind | (5 << x86_opr_av),
+    x86_opr_reg_di           = x86_opr_reg | x86_opr_ind | (6 << x86_opr_av),
+    x86_opr_reg_ah           = x86_opr_reg | x86_opr_ind | (7 << x86_opr_av),
+    x86_opr_reg_v0           = x86_opr_vec | x86_opr_ind | (1 << x86_opr_av),
+    x86_opr_reg_st0          = x86_opr_st  | x86_opr_ind | (1 << x86_opr_av),
+    x86_opr_seg_es           = x86_opr_seg | x86_opr_ind | (1 << x86_opr_av),
+    x86_opr_seg_cs           = x86_opr_seg | x86_opr_ind | (2 << x86_opr_av),
+    x86_opr_seg_ss           = x86_opr_seg | x86_opr_ind | (3 << x86_opr_av),
+    x86_opr_seg_ds           = x86_opr_seg | x86_opr_ind | (4 << x86_opr_av),
+    x86_opr_seg_fs           = x86_opr_seg | x86_opr_ind | (5 << x86_opr_av),
+    x86_opr_seg_gs           = x86_opr_seg | x86_opr_ind | (6 << x86_opr_av),
+
+    /* sized implicit register */
+    x86_opr_reg_al           = x86_opr_reg_a | x86_opr_size_8,
+    x86_opr_reg_cl           = x86_opr_reg_c | x86_opr_size_8,
+    x86_opr_reg_ax           = x86_opr_reg_a | x86_opr_size_16,
+    x86_opr_reg_cx           = x86_opr_reg_c | x86_opr_size_16,
+    x86_opr_reg_dx           = x86_opr_reg_d | x86_opr_size_16,
+    x86_opr_reg_bx           = x86_opr_reg_b | x86_opr_size_16,
+    x86_opr_reg_eax          = x86_opr_reg_a | x86_opr_size_32,
+    x86_opr_reg_ecx          = x86_opr_reg_c | x86_opr_size_32,
+    x86_opr_reg_edx          = x86_opr_reg_d | x86_opr_size_32,
+    x86_opr_reg_ebx          = x86_opr_reg_b | x86_opr_size_32,
+    x86_opr_reg_rax          = x86_opr_reg_a | x86_opr_size_64,
+    x86_opr_reg_rcx          = x86_opr_reg_c | x86_opr_size_64,
+    x86_opr_reg_rdx          = x86_opr_reg_d | x86_opr_size_64,
+    x86_opr_reg_rbx          = x86_opr_reg_b | x86_opr_size_64,
+    x86_opr_reg_aw           = x86_opr_reg_a | x86_opr_size_w,
+    x86_opr_reg_cw           = x86_opr_reg_c | x86_opr_size_w,
+    x86_opr_reg_dw           = x86_opr_reg_d | x86_opr_size_w,
+    x86_opr_reg_bw           = x86_opr_reg_b | x86_opr_size_w,
+    x86_opr_reg_pa           = x86_opr_reg_a | x86_opr_size_a,
+    x86_opr_reg_pc           = x86_opr_reg_c | x86_opr_size_a,
+    x86_opr_reg_pd           = x86_opr_reg_d | x86_opr_size_a,
+    x86_opr_reg_pb           = x86_opr_reg_b | x86_opr_size_a,
+    x86_opr_reg_psi          = x86_opr_reg_si | x86_opr_size_a,
+    x86_opr_reg_pdi          = x86_opr_reg_di | x86_opr_size_a,
+    x86_opr_reg_xmm0         = x86_opr_reg_v0 | x86_opr_size_128,
+    x86_opr_reg_xmm0_7       = x86_opr_reg_v0 | x86_opr_size_1024,
+
+    /* element type */
+    x86_opr_et_none          = (0 << x86_opr_et),
+    x86_opr_et_i             = (1 << x86_opr_et),
+    x86_opr_et_f             = (2 << x86_opr_et),
+    x86_opr_et_bf            = (3 << x86_opr_et),
+
+    /* element width */
+    x86_opr_ew_8             = (1 << x86_opr_ew),
+    x86_opr_ew_16            = (2 << x86_opr_ew),
+    x86_opr_ew_32            = (3 << x86_opr_ew),
+    x86_opr_ew_64            = (4 << x86_opr_ew),
+    x86_opr_ew_128           = (5 << x86_opr_ew),
+    x86_opr_ew_256           = (6 << x86_opr_ew),
+    x86_opr_ew_512           = (7 << x86_opr_ew),
+
+    /* element count */
+    x86_opr_ec_x1            = (1 << x86_opr_ec),
+    x86_opr_ec_x2            = (2 << x86_opr_ec),
+    x86_opr_ec_x4            = (3 << x86_opr_ec),
+    x86_opr_ec_x8            = (4 << x86_opr_ec),
+    x86_opr_ec_x16           = (5 << x86_opr_ec),
+    x86_opr_ec_x32           = (6 << x86_opr_ec),
+    x86_opr_ec_x64           = (7 << x86_opr_ec),
+
+    /* tuple type */
+    x86_opr_i8x1             = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x1,
+    x86_opr_i8x2             = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x2,
+    x86_opr_i8x4             = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x4,
+    x86_opr_i8x8             = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x8,
+    x86_opr_i8x16            = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x16,
+    x86_opr_i8x32            = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x32,
+    x86_opr_i8x64            = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x64,
+    x86_opr_i16x1            = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x1,
+    x86_opr_i16x2            = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x2,
+    x86_opr_i16x4            = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x4,
+    x86_opr_i16x8            = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x8,
+    x86_opr_i16x16           = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x16,
+    x86_opr_i16x32           = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x32,
+    x86_opr_i32x1            = x86_opr_et_i | x86_opr_ew_32 | x86_opr_ec_x1,
+    x86_opr_i32x2            = x86_opr_et_i | x86_opr_ew_32 | x86_opr_ec_x2,
+    x86_opr_i32x4            = x86_opr_et_i | x86_opr_ew_32 | x86_opr_ec_x4,
+    x86_opr_i32x8            = x86_opr_et_i | x86_opr_ew_32 | x86_opr_ec_x8,
+    x86_opr_i32x16           = x86_opr_et_i | x86_opr_ew_32 | x86_opr_ec_x16,
+    x86_opr_i64x1            = x86_opr_et_i | x86_opr_ew_64 | x86_opr_ec_x1,
+    x86_opr_i64x2            = x86_opr_et_i | x86_opr_ew_64 | x86_opr_ec_x2,
+    x86_opr_i64x4            = x86_opr_et_i | x86_opr_ew_64 | x86_opr_ec_x4,
+    x86_opr_i64x8            = x86_opr_et_i | x86_opr_ew_64 | x86_opr_ec_x8,
+    x86_opr_i128x1           = x86_opr_et_i | x86_opr_ew_128 | x86_opr_ec_x1,
+    x86_opr_i128x2           = x86_opr_et_i | x86_opr_ew_128 | x86_opr_ec_x2,
+    x86_opr_i128x4           = x86_opr_et_i | x86_opr_ew_128 | x86_opr_ec_x4,
+    x86_opr_i256x1           = x86_opr_et_i | x86_opr_ew_256 | x86_opr_ec_x1,
+    x86_opr_i256x2           = x86_opr_et_i | x86_opr_ew_256 | x86_opr_ec_x2,
+    x86_opr_i512x1           = x86_opr_et_i | x86_opr_ew_512 | x86_opr_ec_x1,
+    x86_opr_f8x1             = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x1,
+    x86_opr_f8x2             = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x2,
+    x86_opr_f8x4             = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x4,
+    x86_opr_f8x8             = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x8,
+    x86_opr_f8x16            = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x16,
+    x86_opr_f8x32            = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x32,
+    x86_opr_f8x64            = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x64,
+    x86_opr_f16x1            = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x1,
+    x86_opr_f16x2            = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x2,
+    x86_opr_f16x4            = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x4,
+    x86_opr_f16x8            = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x8,
+    x86_opr_f16x16           = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x16,
+    x86_opr_f16x32           = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x32,
+    x86_opr_f32x1            = x86_opr_et_f | x86_opr_ew_32 | x86_opr_ec_x1,
+    x86_opr_f32x2            = x86_opr_et_f | x86_opr_ew_32 | x86_opr_ec_x2,
+    x86_opr_f32x4            = x86_opr_et_f | x86_opr_ew_32 | x86_opr_ec_x4,
+    x86_opr_f32x8            = x86_opr_et_f | x86_opr_ew_32 | x86_opr_ec_x8,
+    x86_opr_f32x16           = x86_opr_et_f | x86_opr_ew_32 | x86_opr_ec_x16,
+    x86_opr_f64x1            = x86_opr_et_f | x86_opr_ew_64 | x86_opr_ec_x1,
+    x86_opr_f64x2            = x86_opr_et_f | x86_opr_ew_64 | x86_opr_ec_x2,
+    x86_opr_f64x4            = x86_opr_et_f | x86_opr_ew_64 | x86_opr_ec_x4,
+    x86_opr_f64x8            = x86_opr_et_f | x86_opr_ew_64 | x86_opr_ec_x8,
+};
+
+/*
+ * operand encoding accessors
+ */
+
+static inline uint x86_opr_has_mem(uint opr) {
+    return (opr & x86_opr_mem) != 0;
+}
+static inline uint x86_opr_type_val(uint opr) {
+    return opr & x86_opr_type_mask;
+}
+static inline uint x86_opr_size_val(uint opr) {
+    return opr & x86_opr_size_mask;
+}
+static inline uint x86_opr_mem_val(uint opr) {
+    return opr & x86_opr_mem_mask;
+}
+static inline uint x86_opr_bcst_val(uint opr) {
+    return opr & x86_opr_bcst_mask;
+}
+static inline uint x86_opr_et_val(uint opr) {
+    return opr & x86_opr_et_mask;
+}
+static inline uint x86_opr_ec_val(uint opr) {
+    return opr & x86_opr_ec_mask;
+}
+static inline uint x86_opr_ew_val(uint opr) {
+    return opr & x86_opr_ew_mask;
+}
+
+static inline uint x86_opr_mem_size(uint opr)
+{
+    switch (x86_opr_mem_val(opr)) {
+    case x86_opr_m8: return x86_opr_size_8;
+    case x86_opr_m16: return x86_opr_size_16;
+    case x86_opr_m32: return x86_opr_size_32;
+    case x86_opr_m64: return x86_opr_size_64;
+    case x86_opr_m80: return x86_opr_size_80;
+    case x86_opr_m128: return x86_opr_size_128;
+    case x86_opr_m256: return x86_opr_size_256;
+    case x86_opr_m512: return x86_opr_size_512;
+    case x86_opr_mw:
+    default: return x86_opr_size_w;
+    }
+}
+
+static inline uint x86_opr_ew_bytes(uint opr)
+{
+    switch (x86_opr_ew_val(opr)) {
+    case x86_opr_ew_8: return 1;
+    case x86_opr_ew_16: return 2;
+    case x86_opr_ew_32: return 4;
+    case x86_opr_ew_64: return 8;
+    case x86_opr_ew_128: return 16;
+    case x86_opr_ew_256: return 32;
+    case x86_opr_ew_512: return 64;
+    }
+    return 0;
+}
+
+static inline uint x86_opr_ew_size(uint opr)
+{
+    switch (x86_opr_ew_val(opr)) {
+    case x86_opr_ew_8: return x86_opr_size_8;
+    case x86_opr_ew_16: return x86_opr_size_16;
+    case x86_opr_ew_32: return x86_opr_size_32;
+    case x86_opr_ew_64: return x86_opr_size_64;
+    case x86_opr_ew_128: return x86_opr_size_128;
+    case x86_opr_ew_256: return x86_opr_size_256;
+    case x86_opr_ew_512: return x86_opr_size_512;
+    }
+    return 0;
+}
+
+static inline uint x86_opr_ec_mult(uint opr)
+{
+    switch (x86_opr_ec_val(opr)) {
+    case x86_opr_ec_x1: return 1;
+    case x86_opr_ec_x2: return 2;
+    case x86_opr_ec_x4: return 4;
+    case x86_opr_ec_x8: return 8;
+    case x86_opr_ec_x16: return 16;
+    case x86_opr_ec_x32: return 32;
+    case x86_opr_ec_x64: return 64;
+    }
+    return 0;
+}
+
+/*
+ * order encoding
+ */
+
+enum
+{
+    x86_ord_none,
+
+    x86_ord_s1               = 0,
+    x86_ord_s2               = x86_ord_s1 + 3,
+    x86_ord_s3               = x86_ord_s2 + 3,
+
+    x86_ord_const            = (1 << x86_ord_s1), /* - */
+    x86_ord_imm              = (2 << x86_ord_s1), /* I */
+    x86_ord_reg              = (3 << x86_ord_s1), /* R */
+    x86_ord_mrm              = (4 << x86_ord_s1), /* M */
+    x86_ord_vec              = (5 << x86_ord_s1), /* V */
+    x86_ord_opr              = (6 << x86_ord_s1), /* O */
+    x86_ord_type_mask        = (7 << x86_ord_s1),
+
+    x86_ord_r                = (1 << x86_ord_s2),
+    x86_ord_w                = (2 << x86_ord_s2),
+    x86_ord_rw               = (3 << x86_ord_s2),
+    x86_ord_i                = (4 << x86_ord_s2),
+    x86_ord_ri               = (5 << x86_ord_s2),
+    x86_ord_wi               = (6 << x86_ord_s2),
+    x86_ord_rwi              = (7 << x86_ord_s2),
+    x86_ord_flag_mask        = (7 << x86_ord_s2),
+
+    x86_ord_one              = x86_ord_const | (1 << x86_ord_s3),
+    x86_ord_rax              = x86_ord_const | (2 << x86_ord_s3),
+    x86_ord_rcx              = x86_ord_const | (3 << x86_ord_s3),
+    x86_ord_rdx              = x86_ord_const | (4 << x86_ord_s3),
+    x86_ord_rbx              = x86_ord_const | (5 << x86_ord_s3),
+    x86_ord_rsp              = x86_ord_const | (6 << x86_ord_s3),
+    x86_ord_rbp              = x86_ord_const | (7 << x86_ord_s3),
+    x86_ord_rsi              = x86_ord_const | (8 << x86_ord_s3),
+    x86_ord_rdi              = x86_ord_const | (9 << x86_ord_s3),
+    x86_ord_st0              = x86_ord_const | (10 << x86_ord_s3),
+    x86_ord_stx              = x86_ord_const | (11 << x86_ord_s3),
+    x86_ord_seg              = x86_ord_const | (12 << x86_ord_s3),
+    x86_ord_xmm0             = x86_ord_const | (13 << x86_ord_s3),
+    x86_ord_xmm0_7           = x86_ord_const | (14 << x86_ord_s3),
+    x86_ord_mxcsr            = x86_ord_const | (15 << x86_ord_s3),
+    x86_ord_rflags           = x86_ord_const | (16 << x86_ord_s3),
+
+    x86_ord_sib              = x86_ord_mrm | (1 << x86_ord_s3),
+
+    x86_ord_is4              = x86_ord_imm | (1 << x86_ord_s3),
+    x86_ord_ime              = x86_ord_imm | (2 << x86_ord_s3),
+};
+
+/*
+ * order encoding accessors
+ */
+
+static inline uint x86_ord_type_val(uint ord) {
+    return ord & x86_ord_type_mask;
+}
+
+/*
+ * codec flags
+ */
+
+enum
+{
+    x86_ce_shift   = 0,
+    x86_cm_shift   = 3,
+    x86_ci_shift   = 5,
+    x86_cj_shift   = 8,
+
+    /* [0:2] encoding */
+    x86_ce_none    = (0 << x86_ce_shift),
+    x86_ce_rex     = (1 << x86_ce_shift),
+    x86_ce_rex2    = (2 << x86_ce_shift),
+    x86_ce_vex2    = (3 << x86_ce_shift),
+    x86_ce_vex3    = (4 << x86_ce_shift),
+    x86_ce_evex    = (5 << x86_ce_shift),
+    x86_ce_mask    = (7 << x86_ce_shift),
+
+    /* [3:4] map */
+    x86_cm_none    = (0 << x86_cm_shift),
+    x86_cm_0f      = (1 << x86_cm_shift),
+    x86_cm_0f38    = (2 << x86_cm_shift),
+    x86_cm_0f3a    = (3 << x86_cm_shift),
+    x86_cm_mask    = (3 << x86_cm_shift),
+
+    /* [5:7] imm */
+    x86_ci_none    = (0 << x86_ci_shift),
+    x86_ci_ib      = (1 << x86_ci_shift),
+    x86_ci_iw      = (2 << x86_ci_shift),
+    x86_ci_iwd     = (3 << x86_ci_shift),
+    x86_ci_i16     = (4 << x86_ci_shift),
+    x86_ci_i32     = (5 << x86_ci_shift),
+    x86_ci_i64     = (6 << x86_ci_shift),
+    x86_ci_mask    = (7 << x86_ci_shift),
+
+    /* [8:9] imm2 */
+    x86_cj_ib      = (1 << x86_cj_shift),
+    x86_cj_i16     = (2 << x86_cj_shift),
+    x86_cj_mask    = (3 << x86_cj_shift),
+
+    /* [10:15] prefixes */
+    x86_cp_osize   = (1 << 10), /* 0x66 */
+    x86_cp_asize   = (1 << 11), /* 0x67 */
+    x86_cp_wait    = (1 << 12), /* 0x9B */
+    x86_cp_lock    = (1 << 13), /* 0xF0 */
+    x86_cp_repne   = (1 << 14), /* 0xF2 */
+    x86_cp_rep     = (1 << 15), /* 0xF3 */
+
+    /* [16:18] flags */
+    x86_cf_modrm   = (1 << 16),
+    x86_cf_ia32    = (1 << 17),
+    x86_cf_amd64   = (1 << 18),
+};
+
+/*
+ * codec struct
+ */
+
+struct x86_codec
+{
+    union {
+        x86_rex rex;
+        x86_rex2 rex2;
+        x86_vex2 vex2;
+        x86_vex3 vex3;
+        x86_evex evex;
+    };
+
+    union {
+        uchar opc[2];
+        struct {
+            uchar opc0;
+            x86_modrm modrm;
+        };
+    };
+
+    uchar opclen : 2;
+    uchar seg : 3;
+    uchar spare;
+    x86_sib sib;
+
+    uint flags;
+    ushort rec;
+    short imm2;
+
+    union {
+        struct {
+            int disp32;
+            int imm32;
+        };
+        struct {
+            ullong imm64;
+        };
+    };
+};
+
+/*
+ * codec accessors
+ */
+
+static inline int x86_codec_field_ce(x86_codec *c) {
+    return c->flags & x86_ce_mask;
+}
+static inline int x86_codec_field_cm(x86_codec *c) {
+    return c->flags & x86_cm_mask;
+}
+static inline int x86_codec_field_ci(x86_codec *c) {
+    return c->flags & x86_ci_mask;
+}
+static inline int x86_codec_field_cj(x86_codec *c) {
+    return c->flags & x86_cj_mask;
+}
+static inline int x86_codec_has_wait(x86_codec *c) {
+    return (c->flags & x86_cp_wait) != 0;
+}
+static inline int x86_codec_has_lock(x86_codec *c) {
+    return (c->flags & x86_cp_lock) != 0;
+}
+static inline int x86_codec_has_rep(x86_codec *c) {
+    return (c->flags & x86_cp_rep) != 0;
+}
+static inline int x86_codec_has_repne(x86_codec *c) {
+    return (c->flags & x86_cp_repne) != 0;
+}
+static inline int x86_codec_has_osize(x86_codec *c) {
+    return (c->flags & x86_cp_osize) != 0;
+}
+static inline int x86_codec_has_asize(x86_codec *c) {
+    return (c->flags & x86_cp_asize) != 0;
+}
+static inline int x86_codec_has_modrm(x86_codec *c) {
+    return (c->flags & x86_cf_modrm) != 0;
+}
+static inline int x86_codec_is16(x86_codec *c) {
+    return (c->flags & (x86_cf_ia32 | x86_cf_amd64)) == 0;
+}
+static inline int x86_codec_is32(x86_codec *c) {
+    return (c->flags & x86_cf_ia32) != 0;
+}
+static inline int x86_codec_is64(x86_codec *c) {
+    return (c->flags & x86_cf_amd64) != 0;
+}
+
+/*
+ * modes
+ */
+
+enum
+{
+    x86_modes_16 = (1 << 0),
+    x86_modes_32 = (1 << 1),
+    x86_modes_64 = (1 << 2),
+};
+
+/*
+ * modes accessors
+ */
+
+static inline int x86_mode_has16(uint mode) {
+    return (mode & x86_modes_16) != 0;
+}
+static inline int x86_mode_has32(uint mode) {
+    return (mode & x86_modes_32) != 0;
+}
+static inline int x86_mode_has64(uint mode) {
+    return (mode & x86_modes_64) != 0;
+}
+
+/*
+ * memory operand
+ */
+
+struct x86_mem
+{
+    union {
+        uint code;
+        struct {
+            uint base : 9;
+            uint index : 9;
+            uint scale : 2;
+        };
+    };
+    int disp;
+};
+
+/*
+ * opcode metadata
+ *
+ * type, prefix, map, opcode, mask, plus operand and order records.
+ *
+ * opcode - opcode number from opcode enum for name lookup
+ * mode   - operating mode (16 | 32 | 64)
+ * opr    - operand list (r8/m8, rw/mw, xmm/m128, etc)
+ * ord    - operand order (register, immediate, regmem, etc)
+ * enc    - operand encoding (type, width, prefix, map, immediate, etc)
+ * opc    - opcode, ModRM function byte or second opcode byte.
+ * opm    - opcode mask (f8 for XX+r), ModRM function or second byte mask.
+ *
+ * prefix and map are provisioned as 6 bits each to align the bitfield.
+ * there are 3 types, 10 prefixes (5 * 2), and 7 maps (up to map6).
+ */
+
+struct x86_opc_data
+{
+    ushort op;
+    ushort mode;
+    ushort opr;
+    ushort ord;
+    uint enc;
+    union {
+        uchar opc[2];
+        ushort opc_s;
+    };
+    union {
+        uchar opm[2];
+        ushort opm_s;
+    };
+};
+
+struct x86_opr_data
+{
+    uint opr[4];
+};
+
+struct x86_ord_data
+{
+    ushort ord[4];
+};
+
+/*
+ * ModRM encoder
+ */
+
+static inline x86_modrm x86_enc_modrm(uint mod, uint reg, uint rm)
+{
+    x86_modrm modrm = {
+        .data = {
+            /* [0:2] */ (rm & 7u) |
+            /* [3:5] */ ((reg & 7u) << 3) |
+            /* [6:7] */ ((mod & 3u) << 6)
+        }
+    };
+    return modrm;
+}
+
+/*
+ * SIB encoder
+ */
+
+static inline x86_sib x86_enc_sib(uint s, uint x, uint b)
+{
+    x86_sib sib = {
+        .data = {
+            /* [0:2] */ (b & 7u) |
+            /* [3:5] */ ((x & 7u) << 3) |
+            /* [6:7] */ (((uint)s & 3u) << 6)
+        }
+    };
+    return sib;
+}
+
+/*
+ * REX encoder
+ */
+
+static inline x86_rex x86_enc_rex(uint w, uint r, uint x, uint b)
+{
+    x86_rex rex = {
+        .data = {
+            /*   [0] */ ((b & 8u) >> 3) |
+            /*   [1] */ ((x & 8u) >> 2) |
+            /*   [2] */ ((r & 8u) >> 1) |
+            /*   [3] */ ((w & 1u) << 3) |
+            /* [4:7] */ 0x40
+        }
+    };
+    return rex;
+}
+
+/*
+ * REX2 encoder
+ */
+
+static inline x86_rex2 x86_enc_rex2(uint m, uint w, uint r, uint x, uint b)
+{
+    x86_rex2 rex2 = {
+        .data = {
+            /*   [0] */ ((b &  8u) >> 3) |
+            /*   [1] */ ((x &  8u) >> 2) |
+            /*   [2] */ ((r &  8u) >> 1) |
+            /*   [3] */ ((w &  1u) << 3) |
+            /*   [4] */ ((b & 16u) << 0) |
+            /*   [5] */ ((x & 16u) << 1) |
+            /*   [6] */ ((r & 16u) << 2) |
+            /*   [7] */ ((m &  1u) << 7)
+        }
+    };
+    return rex2;
+}
+
+/*
+ * VEX2 encoder
+ */
+
+static inline x86_vex2 x86_enc_vex2(uint p, uint l, uint r, uint v)
+{
+    x86_vex2 vex2 = {
+        .data = {
+            /* [0:1] */ ((uint)p & 3u) |
+            /*   [2] */ ((l & 1u) << 2) |
+            /* [3:6] */ ((~v & 15u) << 3) |
+            /*   [7] */ ((~r & 8u) << 4)
+        }
+    };
+    return vex2;
+}
+
+/*
+ * VEX3 encoder
+ */
+
+static inline x86_vex3 x86_enc_vex3(uint m, uint p,
+    uint l, uint w, uint r, uint x, uint b, uint v)
+{
+    x86_vex3 vex3 = {
+        .data = {
+            /* [0:4] */ ((m &  31u) >> 0) |
+            /*   [5] */ ((~b &  8u) << 2) |
+            /*   [6] */ ((~x &  8u) << 3) |
+            /*   [7] */ ((~r &  8u) << 4),
+            /* [0:1] */ ((p &   3u) >> 0) |
+            /*   [2] */ ((l  &  1u) << 2) |
+            /* [3:6] */ ((~v & 15u) << 3) |
+            /*   [7] */ ((w  &  1u) << 7)
+        }
+    };
+    return vex3;
+}
+
+/*
+ * EVEX encoder
+ */
+
+static inline x86_evex x86_enc_evex(uint m, uint p,
+    uint l, uint w, uint r, uint x, uint b, uint v,
+    uint k, uint brd, uint z)
+{
+    x86_evex evex = {
+        .data = {
+            /* [0:2] */ ((m &   7u) >> 0) |
+            /*   [3] */ ((b &  16u) >> 1) |
+            /*   [4] */ ((~r & 16u) >> 0) |
+            /*   [5] */ ((~b &  8u) << 2) |
+            /*   [6] */ ((~x &  8u) << 3) |
+            /*   [7] */ ((~r &  8u) << 4),
+            /* [0:1] */ ((p &   3u) >> 0) |
+            /*   [2] */ ((~x & 16u) >> 2) |
+            /* [3:6] */ ((~v & 15u) << 3) |
+            /*   [7] */ ((w  &  1u) << 7),
+            /* [0:2] */ ((k &   7u) >> 0) |
+            /*   [3] */ ((~v & 16u) >> 1) |
+            /*   [4] */ ((brd & 1u) << 4) |
+            /* [5:6] */ ((l &   3u) << 5) |
+            /*   [7] */ ((z &   1u) << 7)
+        }
+    };
+    return evex;
+}
+
+/*
+ * table sort types
+ */
+
+enum
+{
+    x86_sort_none,
+    x86_sort_numeric,
+    x86_sort_alpha
+};
+
+/*
+ * table encoding prefix types
+ */
+
+enum
+{
+    x86_table_none,
+    x86_table_lex,
+    x86_table_vex,
+    x86_table_evex
+};
+
+/*
+ * table sort indices
+ */
+
+struct x86_table_idx
+{
+    size_t count;
+    size_t *idx;
+};
+
+/*
+ * opcode index decode tables
+ */
+
+struct x86_acc_idx
+{
+    size_t map_count;
+    x86_opc_data *map;
+    size_t acc_count;
+    x86_acc_entry *acc;
+    uchar *page_offsets;
+};
+
+/*
+ * opcode index acceleration entry
+ */
+
+struct x86_acc_entry
+{
+    uint idx : 24;
+    uint nent : 8;
+};
+
+/*
+ * opcode acceleration functions
+ */
+
+static inline uint x86_acc_page(uint type, uint prefix, uint map)
+{
+    return (type & 3) | ((prefix & 15) << 2) | ((map & 7) << 6);
+}
+
+static inline size_t x86_acc_offset(x86_acc_idx *idx, size_t acc_page)
+{
+    return (size_t)idx->page_offsets[acc_page] << 8;
+}
+
+static inline x86_acc_entry *x86_acc_lookup(x86_acc_idx *idx, size_t offset)
+{
+    return idx->acc + offset;
+}
+
+/*
+ * bitmap utility functions
+ */
+
+static inline size_t x86_bitmap_idx(size_t i) { return i >> 6; }
+static inline size_t x86_bitmap_shift(size_t i) { return (i & 63); }
+
+static inline int x86_bitmap_get(ullong *bitmap, size_t i)
+{
+    return (int)((bitmap[x86_bitmap_idx(i)] >> x86_bitmap_shift(i)) & 1);
+}
+
+static inline void x86_bitmap_set(ullong *bitmap, size_t i, int value)
+{
+    bitmap[x86_bitmap_idx(i)] |= ((ullong)value << x86_bitmap_shift(i));
+}
+
+/*
+ * context for encoder, decoder, formatter and parser
+ */
+
+struct x86_ctx
+{
+    uint mode;
+    x86_acc_idx *idx;
+};
+
+/*
+ * buffer
+ *
+ * simplified buffer with read (start) and write (end) cursors
+ * capacity is user managed because it does no limit checking.
+ */
+
+struct x86_buffer
+{
+    uchar *data;
+    size_t start;
+    size_t end;
+};
+
+/*
+ * buffer functions
+ */
+
+static inline void x86_buffer_init(x86_buffer *b, uchar *data)
+{
+    b->start = 0;
+    b->end = 0;
+    b->data = data;
+}
+
+static inline void x86_buffer_init_ex(x86_buffer *b, uchar *data,
+    size_t start, size_t end)
+{
+    b->start = start;
+    b->end = end;
+    b->data = data;
+}
+
+static inline size_t x86_buffer_read(x86_buffer *b, uchar *buf, size_t len)
+{
+    memcpy(buf, b->data + b->start, len);
+    b->start += len;
+    return len;
+}
+
+static inline size_t x86_buffer_unread(x86_buffer *b, size_t len)
+{
+    b->start -= len;
+    return len;
+}
+
+static inline size_t x86_buffer_write(x86_buffer *b, uchar *buf, size_t len)
+{
+    memcpy(b->data + b->end, buf, len);
+    b->end += len;
+    return len;
+}
+
+static inline size_t x86_buffer_unwrite(x86_buffer *b, size_t len)
+{
+    b->end -= len;
+    return len;
+}
+
+static inline size_t x86_out8(x86_buffer *buf, u8 v)
+{
+    return x86_buffer_write(buf, (void *)&v, sizeof(u8));
+}
+
+static inline size_t x86_out16(x86_buffer *buf, u16 v)
+{
+    u16 t = cpu_to_le16(v);
+    return x86_buffer_write(buf, (void *)&t, sizeof(u16));
+}
+
+static inline size_t x86_out32(x86_buffer *buf, u32 v)
+{
+    u32 t = cpu_to_le32(v);
+    return x86_buffer_write(buf, (void *)&t, sizeof(u32));
+}
+
+static inline size_t x86_out64(x86_buffer *buf, u64 v)
+{
+    u64 t = cpu_to_le64(v);
+    return x86_buffer_write(buf, (void *)&t, sizeof(u64));
+}
+
+static inline size_t x86_unput(x86_buffer *buf, size_t n)
+{
+    return x86_buffer_unwrite(buf, n);
+}
+
+static inline u8 x86_in8(x86_buffer *buf)
+{
+    u8 t = 0;
+    x86_buffer_read(buf, (void *)&t, sizeof(u8));
+    return t;
+}
+
+static inline u16 x86_in16(x86_buffer *buf)
+{
+    u16 t = 0;
+    x86_buffer_read(buf, (void *)&t, sizeof(u16));
+    return le16_to_cpu(t);
+}
+
+static inline u32 x86_in32(x86_buffer *buf)
+{
+    u32 t = 0;
+    x86_buffer_read(buf, (void *)&t, sizeof(u32));
+    return le32_to_cpu(t);
+}
+
+static inline u64 x86_in64(x86_buffer *buf)
+{
+    u64 t = 0;
+    x86_buffer_read(buf, (void *)&t, sizeof(u64));
+    return le64_to_cpu(t);
+}
+
+/*
+ * metadata tables
+ */
+
+extern const char *x86_reg_names[];
+extern const size_t x86_op_names_size;
+extern const char *x86_op_names[];
+extern const size_t x86_opc_table_size;
+extern const x86_opc_data x86_opc_table[];
+extern const size_t x86_opr_table_size;
+extern const x86_opr_data x86_opr_table[];
+extern const size_t x86_ord_table_size;
+extern const x86_ord_data x86_ord_table[];
+
+/*
+ * encoder, decoder, table lookup, disassembly
+ */
+
+void x86_set_debug(uint d);
+size_t x86_mode_name(char *buf, size_t len, uint mode, const char *sep);
+size_t x86_map_name(char *buf, size_t len, uint mode, const char *sep);
+size_t x86_ord_name(char *buf, size_t len, uint ord, const char *sep);
+size_t x86_ord_mnem(char *buf, size_t len, const ushort *ord);
+size_t x86_opr_name(char *buf, size_t len, uint opr);
+size_t x86_enc_name(char *buf, size_t len, uint enc);
+const char *x86_reg_name(uint reg);
+const char *x86_table_type_name(uint type);
+const char *x86_table_map_name(uint map);
+const char *x86_table_prefix_name(uint prefix);
+int x86_enc_filter_rex(x86_rex prefix, uint enc);
+int x86_enc_filter_rex2(x86_rex2 prefix, uint enc);
+int x86_enc_filter_vex2(x86_vex2 prefix, uint enc);
+int x86_enc_filter_vex3(x86_vex3 prefix, uint enc);
+int x86_enc_filter_evex(x86_evex prefix, uint enc);
+x86_table_idx x86_opc_table_identity(void);
+x86_table_idx x86_opc_table_sorted(x86_table_idx tab, uint sort);
+x86_table_idx x86_opc_table_filter(x86_table_idx tab, uint modes);
+x86_opc_data *x86_table_lookup(x86_acc_idx *idx, const x86_opc_data *m);
+void x86_print_op(const x86_opc_data *d, uint compact, uint opcode);
+size_t x86_format_op(char *buf, size_t len, x86_ctx *ctx, x86_codec *c);
+typedef size_t (*x86_fmt_symbol)(char *buf, size_t buflen, x86_codec *c,
+    size_t pc_offset);
+size_t x86_format_op_symbol(char *buf, size_t buflen, x86_ctx *ctx,
+    x86_codec *c, size_t pc_offset, x86_fmt_symbol sym_cb);
+size_t x86_format_hex(char *buf, size_t len, uchar *data, size_t datalen);
+x86_ctx *x86_ctx_create(uint mode);
+void x86_ctx_destroy(x86_ctx *ctx);
+int x86_codec_write(x86_ctx *ctx, x86_buffer *buf, x86_codec c, size_t *len);
+int x86_codec_read(x86_ctx *ctx, x86_buffer *buf, x86_codec *c, size_t *len);
+
+/*
+ * registers sand opcodes
+ */
+
+#include "disas/x86-enums.inc"
diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h
index 3b50ecfb5409..fd8aa326948f 100644
--- a/include/disas/dis-asm.h
+++ b/include/disas/dis-asm.h
@@ -446,6 +446,7 @@ int print_insn_v850             (bfd_vma, disassemble_info*);
 int print_insn_tic30            (bfd_vma, disassemble_info*);
 int print_insn_microblaze       (bfd_vma, disassemble_info*);
 int print_insn_ia64             (bfd_vma, disassemble_info*);
+int print_insn_x86              (bfd_vma, disassemble_info*);
 int print_insn_xtensa           (bfd_vma, disassemble_info*);
 int print_insn_riscv32          (bfd_vma, disassemble_info*);
 int print_insn_riscv64          (bfd_vma, disassemble_info*);
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 1ca6307c72ef..8b53ba800b18 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -46,6 +46,7 @@
 #include "tcg/tcg-cpu.h"
 
 #include "disas/capstone.h"
+#include "disas/x86.h"
 #include "cpu-internal.h"
 
 static void x86_cpu_realizefn(DeviceState *dev, Error **errp);
@@ -8686,6 +8687,12 @@ static void x86_disas_set_info(CPUState *cs, disassemble_info *info)
                       : CS_MODE_16);
     info->cap_insn_unit = 1;
     info->cap_insn_split = 8;
+
+    info->print_insn = print_insn_x86;
+    info->private_data = x86_ctx_create(
+        env->hflags & HF_CS64_MASK ? x86_modes_64
+        : env->hflags & HF_CS32_MASK ? x86_modes_32
+        : x86_modes_16);
 }
 
 void x86_update_hflags(CPUX86State *env)
-- 
2.43.0
Re: [PATCH v3 4/4] x86-disas: add x86-mini disassembler implementation
Posted by Philippe Mathieu-Daudé 6 months ago
Hi Michael,

Minor comments inline.

On 14/5/25 09:39, Michael Clark wrote:
> the x86-mini library is a lightweight x86 encoder, decoder, and
> disassembler that uses extensions to the Intel instruction set
> metadata to encode modern VEX/EVEX instructions and legacy
> instructions with a parameterized LEX (legacy extension) format.
> 
> this patch adds the x86-tablegen.py script, the disassembler,
> a print_insn_x86 implementation plus host and target cpu stubs.
> 
> Signed-off-by: Michael Clark <michael@anarch128.org>
> ---
>   disas/disas-host.c      |    5 +
>   disas/meson.build       |   97 ++
>   disas/x86-core.c        | 2716 +++++++++++++++++++++++++++++++++++++++
>   disas/x86-disas.c       |   96 ++
>   disas/x86.h             | 1860 +++++++++++++++++++++++++++
>   include/disas/dis-asm.h |    1 +
>   target/i386/cpu.c       |    7 +
>   7 files changed, 4782 insertions(+)
>   create mode 100644 disas/x86-core.c
>   create mode 100644 disas/x86-disas.c
>   create mode 100644 disas/x86.h


> diff --git a/disas/x86-core.c b/disas/x86-core.c
> new file mode 100644
> index 000000000000..c4f7034e3420
> --- /dev/null
> +++ b/disas/x86-core.c
> @@ -0,0 +1,2716 @@
> +/*
> + * Copyright (c) 2024-2025 Michael Clark
> + *
> + * SPDX-License-Identifier: MIT
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included
> + * in all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdarg.h>
> +#include <string.h>
> +#include <limits.h>

No need for these 5 includes ...
> +
> +#include "qemu/osdep.h"

... as they are provided by "qemu/osdep.h".

> +#include "qemu/host-utils.h"
> +
> +#include "disas/x86.h"
> +#include "disas/x86-tables.inc"


> +x86_opr_formatter x86_format_intel_hex;
> +x86_opr_formatter x86_format_intel_dec;
> +x86_opr_formats x86_opr_formats_intel_hex;
> +x86_opr_formats x86_opr_formats_intel_dec;
> +
> +static uint debug;
> +
> +void x86_set_debug(uint d) { debug = d; }
> +

const

> +x86_map_str x86_mode_names[] =
> +{
> +    { x86_modes_64,             "64"               },
> +    { x86_modes_32,             "32"               },
> +    { x86_modes_16,             "16"               },
> +};
> +

const

> +x86_map_str x86_map_names[] =
> +{
> +    { x86_map_map6,            "map6"              },
> +    { x86_map_map5,            "map5"              },
> +    { x86_map_map4,            "map4"              },
> +    { x86_map_0f3a,            "0f3a"              },
> +    { x86_map_0f38,            "0f38"              },
> +    { x86_map_0f,              "0f"                },
> +};
> +

const

> +x86_map_str x86_ord_names[] =
> +{
> +    { x86_ord_rflags,           "rflags"           },
> +    { x86_ord_mxcsr,            "mxcsr"            },
> +    { x86_ord_xmm0_7,           "xmm0_7"           },
> +    { x86_ord_xmm0,             "xmm0"             },
> +    { x86_ord_seg,              "seg"              },
> +    { x86_ord_stx,              "stx"              },
> +    { x86_ord_st0,              "st0"              },
> +    { x86_ord_rdi,              "rdi"              },
> +    { x86_ord_rsi,              "rsi"              },
> +    { x86_ord_rbp,              "rbp"              },
> +    { x86_ord_rsp,              "rsp"              },
> +    { x86_ord_rbx,              "rbx"              },
> +    { x86_ord_rdx,              "rdx"              },
> +    { x86_ord_rcx,              "rcx"              },
> +    { x86_ord_rax,              "rax"              },
> +    { x86_ord_one,              "one"              },
> +    { x86_ord_ime,              "ime"              },
> +    { x86_ord_is4,              "is4"              },
> +    { x86_ord_sib,              "sib"              },
> +    { x86_ord_vec,              "vec"              },
> +    { x86_ord_opr,              "opr"              },
> +    { x86_ord_mrm,              "mrm"              },
> +    { x86_ord_reg,              "reg"              },
> +    { x86_ord_imm,              "imm"              },
> +    { x86_ord_rwi,              "rwi"              },
> +    { x86_ord_wi,               "wi"               },
> +    { x86_ord_ri,               "ri"               },
> +    { x86_ord_rw,               "rw"               },
> +    { x86_ord_i,                "i"                },
> +    { x86_ord_r,                "r"                },
> +    { x86_ord_w,                "w"                },
> +    { 0,                        NULL               },
> +};
> +

const

> +x86_map_str x86_opr_names[] =
> +{
> +    { x86_opr_bnd_mem,          "bnd/mem"          },
> +    { x86_opr_k_m64,            "k/m64"            },
> +    { x86_opr_k_m32,            "k/m32"            },
> +    { x86_opr_k_m16,            "k/m16"            },
> +    { x86_opr_k_m8,             "k/m8"             },
> +    { x86_opr_vm64z,            "vm64z"            },
> +    { x86_opr_vm64y,            "vm64y"            },
> +    { x86_opr_vm64x,            "vm64x"            },
> +    { x86_opr_vm32z,            "vm32z"            },
> +    { x86_opr_vm32y,            "vm32y"            },
> +    { x86_opr_vm32x,            "vm32x"            },
> +    { x86_opr_rw_mw,            "rw/mw"            },
> +    { x86_opr_r64_m64,          "r64/m64"          },
> +    { x86_opr_r64_m32,          "r64/m32"          },
> +    { x86_opr_r64_m16,          "r64/m16"          },
> +    { x86_opr_r64_m8,           "r64/m8"           },
> +    { x86_opr_r32_m32,          "r32/m32"          },
> +    { x86_opr_r32_m16,          "r32/m16"          },
> +    { x86_opr_r32_m8,           "r32/m8"           },
> +    { x86_opr_r16_m16,          "r16/m16"          },
> +    { x86_opr_r16_m8,           "r16/m8"           },
> +    { x86_opr_r8_m8,            "r8/m8"            },
> +    { x86_opr_zmm_m512_m64bcst, "zmm/m512/m64bcst" },
> +    { x86_opr_zmm_m512_m32bcst, "zmm/m512/m32bcst" },
> +    { x86_opr_zmm_m512_m16bcst, "zmm/m512/m16bcst" },
> +    { x86_opr_ymm_m256_m64bcst, "ymm/m256/m64bcst" },
> +    { x86_opr_ymm_m256_m32bcst, "ymm/m256/m32bcst" },
> +    { x86_opr_ymm_m256_m16bcst, "ymm/m256/m16bcst" },
> +    { x86_opr_xmm_m128_m64bcst, "xmm/m128/m64bcst" },
> +    { x86_opr_xmm_m128_m32bcst, "xmm/m128/m32bcst" },
> +    { x86_opr_xmm_m128_m16bcst, "xmm/m128/m16bcst" },
> +    { x86_opr_xmm_m64_m32bcst,  "xmm/m64/m32bcst"  },
> +    { x86_opr_xmm_m64_m16bcst,  "xmm/m64/m16bcst"  },
> +    { x86_opr_xmm_m32_m16bcst,  "xmm/m32/m16bcst"  },
> +    { x86_opr_zmm_m512,         "zmm/m512"         },
> +    { x86_opr_ymm_m256,         "ymm/m256"         },
> +    { x86_opr_xmm_m128,         "xmm/m128"         },
> +    { x86_opr_xmm_m64,          "xmm/m64"          },
> +    { x86_opr_xmm_m32,          "xmm/m32"          },
> +    { x86_opr_xmm_m16,          "xmm/m16"          },
> +    { x86_opr_xmm_m8,           "xmm/m8"           },
> +    { x86_opr_mm_m64,           "mm/m64"           },
> +    { x86_opr_mm_m32,           "mm/m32"           },
> +    { x86_opr_mp,               "mp"               },
> +    { x86_opr_mw,               "mw"               },
> +    { x86_opr_vm64,             "vm64"             },
> +    { x86_opr_vm32,             "vm32"             },
> +    { x86_opr_r_m64,            "r/m64"            },
> +    { x86_opr_r_m32,            "r/m32"            },
> +    { x86_opr_r_m16,            "r/m16"            },
> +    { x86_opr_r_m8,             "r/m8"             },
> +    { x86_opr_m64bcst,          "m64bcst"          },
> +    { x86_opr_m32bcst,          "m32bcst"          },
> +    { x86_opr_m16bcst,          "m16bcst"          },
> +    { x86_opr_mib,              "mib"              },
> +    { x86_opr_m384,             "m384"             },
> +    { x86_opr_m80,              "m80"              },
> +    { x86_opr_m512,             "m512"             },
> +    { x86_opr_m256,             "m256"             },
> +    { x86_opr_m128,             "m128"             },
> +    { x86_opr_m64,              "m64"              },
> +    { x86_opr_m32,              "m32"              },
> +    { x86_opr_m16,              "m16"              },
> +    { x86_opr_m8,               "m8"               },
> +    { x86_opr_seg_gs,           "gs"               },
> +    { x86_opr_seg_fs,           "fs"               },
> +    { x86_opr_seg_ds,           "ds"               },
> +    { x86_opr_seg_ss,           "ss"               },
> +    { x86_opr_seg_cs,           "cs"               },
> +    { x86_opr_seg_es,           "es"               },
> +    { x86_opr_reg_xmm0_7,       "xmm0_7"           },
> +    { x86_opr_reg_xmm0,         "xmm0"             },
> +    { x86_opr_reg_pdi,          "pdi"              },
> +    { x86_opr_reg_psi,          "psi"              },
> +    { x86_opr_reg_pb,           "pb"               },
> +    { x86_opr_reg_pd,           "pd"               },
> +    { x86_opr_reg_pc,           "pc"               },
> +    { x86_opr_reg_pa,           "pa"               },
> +    { x86_opr_reg_bw,           "bw"               },
> +    { x86_opr_reg_dw,           "dw"               },
> +    { x86_opr_reg_cw,           "cw"               },
> +    { x86_opr_reg_aw,           "aw"               },
> +    { x86_opr_reg_rbx,          "rbx"              },
> +    { x86_opr_reg_rdx,          "rdx"              },
> +    { x86_opr_reg_rcx,          "rcx"              },
> +    { x86_opr_reg_rax,          "rax"              },
> +    { x86_opr_reg_ebx,          "ebx"              },
> +    { x86_opr_reg_edx,          "edx"              },
> +    { x86_opr_reg_ecx,          "ecx"              },
> +    { x86_opr_reg_eax,          "eax"              },
> +    { x86_opr_reg_bx,           "bx"               },
> +    { x86_opr_reg_dx,           "dx"               },
> +    { x86_opr_reg_cx,           "cx"               },
> +    { x86_opr_reg_ax,           "ax"               },
> +    { x86_opr_reg_cl,           "cl"               },
> +    { x86_opr_reg_al,           "al"               },
> +    { x86_opr_reg_st0,          "st0"              },
> +    { x86_opr_reg_v0,           "v0"               },
> +    { x86_opr_reg_ah,           "ah"               },
> +    { x86_opr_reg_di,           "di"               },
> +    { x86_opr_reg_si,           "si"               },
> +    { x86_opr_reg_d,            "d"                },
> +    { x86_opr_reg_c,            "c"                },
> +    { x86_opr_reg_a,            "a"                },
> +    { x86_opr_memfar16_64,      "memfar16:64"      },
> +    { x86_opr_memfar16_32,      "memfar16:32"      },
> +    { x86_opr_memfar16_16,      "memfar16:16"      },
> +    { x86_opr_far16_32,         "far16:32"         },
> +    { x86_opr_far16_16,         "far16:16"         },
> +    { x86_opr_relw,             "relw"             },
> +    { x86_opr_rel8,             "rel8"             },
> +    { x86_opr_moffs,            "moffs"            },
> +    { x86_opr_1,                "1"                },
> +    { x86_opr_zmm,              "zmm"              },
> +    { x86_opr_ymm,              "ymm"              },
> +    { x86_opr_xmm,              "xmm"              },
> +    { x86_opr_mm,               "mm"               },
> +    { x86_opr_ra,               "ra"               },
> +    { x86_opr_rw,               "rw"               },
> +    { x86_opr_r64,              "r64"              },
> +    { x86_opr_r32,              "r32"              },
> +    { x86_opr_r16,              "r16"              },
> +    { x86_opr_r8,               "r8"               },
> +    { x86_opr_iw,               "iw"               },
> +    { x86_opr_iwd,              "iwd"              },
> +    { x86_opr_i64,              "i64"              },
> +    { x86_opr_i32,              "i32"              },
> +    { x86_opr_i16,              "i16"              },
> +    { x86_opr_ib,               "ib"               },
> +    { x86_opr_bnd,              "bnd"              },
> +    { x86_opr_dreg,             "dreg"             },
> +    { x86_opr_creg,             "creg"             },
> +    { x86_opr_seg,              "seg"              },
> +    { x86_opr_k,                "k"                },
> +    { x86_opr_st,               "st"               },
> +    { x86_opr_mmx,              "mmx"              },
> +    { x86_opr_vec,              "vec"              },
> +    { x86_opr_reg,              "reg"              },
> +    { x86_opr_imm,              "imm"              },
> +    { x86_opr_bcst,             "bcst"             },
> +    { x86_opr_mem,              "mem"              },
> +    { x86_opr_flag_er,          "{er}"             },
> +    { x86_opr_flag_k,           "{k}"              },
> +    { x86_opr_flag_sae,         "{sae}"            },
> +    { x86_opr_flag_z,           "{z}"              },
> +    { x86_opr_flag_rs2,         "{rs2}"            },
> +    { x86_opr_flag_rs4,         "{rs4}"            },
> +    { x86_opr_f64x8,            "/f64x8"           },
> +    { x86_opr_f64x4,            "/f64x4"           },
> +    { x86_opr_f64x2,            "/f64x2"           },
> +    { x86_opr_f64x1,            "/f64x1"           },
> +    { x86_opr_f32x16,           "/f32x16"          },
> +    { x86_opr_f32x8,            "/f32x8"           },
> +    { x86_opr_f32x4,            "/f32x4"           },
> +    { x86_opr_f32x2,            "/f32x2"           },
> +    { x86_opr_f32x1,            "/f32x1"           },
> +    { x86_opr_f16x32,           "/f16x32"          },
> +    { x86_opr_f16x16,           "/f16x16"          },
> +    { x86_opr_f16x8,            "/f16x8"           },
> +    { x86_opr_f16x4,            "/f16x4"           },
> +    { x86_opr_f16x2,            "/f16x2"           },
> +    { x86_opr_f16x1,            "/f16x1"           },
> +    { x86_opr_f8x64,            "/f8x64"           },
> +    { x86_opr_f8x32,            "/f8x32"           },
> +    { x86_opr_f8x16,            "/f8x16"           },
> +    { x86_opr_f8x8,             "/f8x8"            },
> +    { x86_opr_f8x4,             "/f8x4"            },
> +    { x86_opr_f8x2,             "/f8x2"            },
> +    { x86_opr_f8x1,             "/f8x1"            },
> +    { x86_opr_i512x1,           "/i512x1"          },
> +    { x86_opr_i256x2,           "/i256x2"          },
> +    { x86_opr_i256x1,           "/i256x1"          },
> +    { x86_opr_i128x4,           "/i128x4"          },
> +    { x86_opr_i128x2,           "/i128x2"          },
> +    { x86_opr_i128x1,           "/i128x1"          },
> +    { x86_opr_i64x8,            "/i64x8"           },
> +    { x86_opr_i64x4,            "/i64x4"           },
> +    { x86_opr_i64x2,            "/i64x2"           },
> +    { x86_opr_i64x1,            "/i64x1"           },
> +    { x86_opr_i32x16,           "/i32x16"          },
> +    { x86_opr_i32x8,            "/i32x8"           },
> +    { x86_opr_i32x4,            "/i32x4"           },
> +    { x86_opr_i32x2,            "/i32x2"           },
> +    { x86_opr_i32x1,            "/i32x1"           },
> +    { x86_opr_i16x32,           "/i16x32"          },
> +    { x86_opr_i16x16,           "/i16x16"          },
> +    { x86_opr_i16x8,            "/i16x8"           },
> +    { x86_opr_i16x4,            "/i16x4"           },
> +    { x86_opr_i16x2,            "/i16x2"           },
> +    { x86_opr_i16x1,            "/i16x1"           },
> +    { x86_opr_i8x64,            "/i8x64"           },
> +    { x86_opr_i8x32,            "/i8x32"           },
> +    { x86_opr_i8x16,            "/i8x16"           },
> +    { x86_opr_i8x8,             "/i8x8"            },
> +    { x86_opr_i8x4,             "/i8x4"            },
> +    { x86_opr_i8x2,             "/i8x2"            },
> +    { x86_opr_i8x1,             "/i8x1"            },
> +    { 0,                        NULL               },
> +};
> +

const

> +x86_map_str x86_enc_names[] =
> +{
> +    { x86_enc_r_norexb,         " .norexb"         },
> +    { x86_enc_r_lock,           " .lock"           },
> +    { x86_enc_r_rep,            " .rep"            },
> +    { x86_enc_s_a64,            " .a64"            },
> +    { x86_enc_s_a32,            " .a32"            },
> +    { x86_enc_s_a16,            " .a16"            },
> +    { x86_enc_s_o64,            " .o64"            },
> +    { x86_enc_s_o32,            " .o32"            },
> +    { x86_enc_s_o16,            " .o16"            },
> +    { x86_enc_j_i16,            " i16"             },
> +    { x86_enc_j_ib,             " ib"              },
> +    { x86_enc_i_i64,            " i64"             },
> +    { x86_enc_i_i32,            " i32"             },
> +    { x86_enc_i_i16,            " i16"             },
> +    { x86_enc_i_iwd,            " iwd"             },
> +    { x86_enc_i_iw,             " iw"              },
> +    { x86_enc_i_ib,             " ib"              },
> +    { x86_enc_f_opcode_r,       ""                 },
> +    { x86_enc_f_opcode,         ""                 },
> +    { x86_enc_f_modrm_n,        ""                 },
> +    { x86_enc_f_modrm_r,        ""                 },
> +    { x86_enc_o_opcode_r,       ""                 },
> +    { x86_enc_t_evex,           ".evex"            },
> +    { x86_enc_t_vex,            ".vex"             },
> +    { x86_enc_t_lex,            ".lex"             },
> +    { x86_enc_l_lig,            ".lig"             },
> +    { x86_enc_l_512,            ".512"             },
> +    { x86_enc_l_256,            ".256"             },
> +    { x86_enc_l_128,            ".128"             },
> +    { x86_enc_l_l1,             ".l1"              },
> +    { x86_enc_l_l0,             ".l0"              },
> +    { x86_enc_l_lz,             ".lz"              },
> +    { x86_enc_p_rexw,           ".w"               },
> +    { x86_enc_p_9b,             ".9b"              },
> +    { x86_enc_p_f2,             ".f2"              },
> +    { x86_enc_p_f3,             ".f3"              },
> +    { x86_enc_p_66,             ".66"              },
> +    { x86_enc_m_map6,           ".map6"            },
> +    { x86_enc_m_map5,           ".map5"            },
> +    { x86_enc_m_map4,           ".map4"            },
> +    { x86_enc_m_0f3a,           ".0f3a"            },
> +    { x86_enc_m_0f38,           ".0f38"            },
> +    { x86_enc_m_0f,             ".0f"              },
> +    { x86_enc_w_wig,            ".wig"             },
> +    { x86_enc_w_ww,             ".ww"              },
> +    { x86_enc_w_wx,             ".wx"              },
> +    { x86_enc_w_wn,             ".wn"              },
> +    { x86_enc_w_wb,             ".wb"              },
> +    { x86_enc_w_w1,             ".w1"              },
> +    { x86_enc_w_w0,             ".w0"              },
> +    { 0,                        NULL               },
> +};

> +
> +static void x86_print_row(size_t count, x86_table_col *cols)
> +{
> +    printf("|");
> +    for (size_t i = 0; i < count; i++) {
> +        printf(" %-*s |", cols[i].width, cols[i].data);
> +        g_free(cols[i].data);
> +    }
> +    printf("\n");

Shouldn't we use info->fprintf_func() in disas/ ?

> +}

const

> +x86_opr_formats x86_opr_formats_intel_hex =
> +{
> +    .ptr_rip            = "%s[rip]",
> +    .ptr_rip_disp       = "%s[rip %s 0x%x]",
> +    .ptr_reg            = "%s[%s]",
> +    .ptr_reg_disp       = "%s[%s %s 0x%x]",
> +    .ptr_reg_sreg       = "%s[%s + %d*%s]",
> +    .ptr_reg_sreg_disp  = "%s[%s + %d*%s %s 0x%x]",
> +    .ptr_reg_reg        = "%s[%s + %s]",
> +    .ptr_reg_reg_disp   = "%s[%s + %s %s 0x%x]",
> +    .ptr_sreg           = "%s[%d*%s]",
> +    .ptr_disp           = "%s[%s0x%x]",
> +    .ptr_imm64          = "%s[%s0x%llx]",
> +    .ptr_imm32          = "%s[%s0x%x]",
> +    .imm64              = "%s0x%llx",
> +    .imm32              = "%s0x%x",
> +    .reg                = "%s",
> +};
> +

const

> +x86_opr_formats x86_opr_formats_intel_dec =
> +{
> +    .ptr_rip            = "%s[rip]",
> +    .ptr_rip_disp       = "%s[rip %s %u]",
> +    .ptr_reg            = "%s[%s]",
> +    .ptr_reg_disp       = "%s[%s %s %u]",
> +    .ptr_reg_sreg       = "%s[%s + %d*%s]",
> +    .ptr_reg_sreg_disp  = "%s[%s + %d*%s %s %u]",
> +    .ptr_reg_reg        = "%s[%s + %s]",
> +    .ptr_reg_reg_disp   = "%s[%s + %s %s %u]",
> +    .ptr_sreg           = "%s[%d*%s]",
> +    .ptr_disp           = "%s[%s%u]",
> +    .ptr_imm64          = "%s[%s%llu]",
> +    .ptr_imm32          = "%s[%s%u]",
> +    .imm64              = "%s%llu",
> +    .imm32              = "%s%u",
> +    .reg                = "%s",
> +};
> +

const

> +x86_opr_formatter x86_format_intel_hex =
> +{
> +    .fmt_const = &x86_opr_intel_const_str,
> +    .fmt_imm = &x86_opr_intel_imm_hex_str,
> +    .fmt_reg = &x86_opr_intel_reg_str,
> +    .fmt_mrm = &x86_opr_intel_mrm_hex_str,
> +    .fmt_vec = &x86_opr_intel_vec_str,
> +    .fmt_opb = &x86_opr_intel_opb_str,
> +    .fmt_is4 = &x86_opr_intel_is4_str,
> +    .fmt_ime = &x86_opr_intel_ime_hex_str,
> +    .fmt_rel = &x86_opr_intel_rel_hex_str
> +};
> +

const

> +x86_opr_formatter x86_format_intel_dec =
> +{
> +    .fmt_const = &x86_opr_intel_const_str,
> +    .fmt_imm = &x86_opr_intel_imm_dec_str,
> +    .fmt_reg = &x86_opr_intel_reg_str,
> +    .fmt_mrm = &x86_opr_intel_mrm_dec_str,
> +    .fmt_vec = &x86_opr_intel_vec_str,
> +    .fmt_opb = &x86_opr_intel_opb_str,
> +    .fmt_is4 = &x86_opr_intel_is4_str,
> +    .fmt_ime = &x86_opr_intel_ime_dec_str,
> +    .fmt_rel = &x86_opr_intel_rel_dec_str
> +};
Re: [PATCH v3 4/4] x86-disas: add x86-mini disassembler implementation
Posted by Michael Clark 6 months ago
On 5/14/25 22:25, Philippe Mathieu-Daudé wrote:
> Hi Michael,
> 
> Minor comments inline.
> 
> On 14/5/25 09:39, Michael Clark wrote:

[snipped]

>> +
>> +static void x86_print_row(size_t count, x86_table_col *cols)
>> +{
>> +    printf("|");
>> +    for (size_t i = 0; i < count; i++) {
>> +        printf(" %-*s |", cols[i].width, cols[i].data);
>> +        g_free(cols[i].data);
>> +    }
>> +    printf("\n");
> 
> Shouldn't we use info->fprintf_func() in disas/ ?

this could be dead code which I could possibly remove from the patch.
the upstream has a metadata table tool that prints out metadata. an
alternative approach is to add the metadata table tools as utilities.

>> +}
> 
> const

the const changes all seem to make sense.


Re: [PATCH v3 4/4] x86-disas: add x86-mini disassembler implementation
Posted by Daniel P. Berrangé 6 months ago
On Wed, May 14, 2025 at 07:39:27PM +1200, Michael Clark wrote:
> the x86-mini library is a lightweight x86 encoder, decoder, and
> disassembler that uses extensions to the Intel instruction set
> metadata to encode modern VEX/EVEX instructions and legacy
> instructions with a parameterized LEX (legacy extension) format.
> 
> this patch adds the x86-tablegen.py script, the disassembler,
> a print_insn_x86 implementation plus host and target cpu stubs.
> 
> Signed-off-by: Michael Clark <michael@anarch128.org>
> ---
>  disas/disas-host.c      |    5 +
>  disas/meson.build       |   97 ++
>  disas/x86-core.c        | 2716 +++++++++++++++++++++++++++++++++++++++
>  disas/x86-disas.c       |   96 ++
>  disas/x86.h             | 1860 +++++++++++++++++++++++++++
>  include/disas/dis-asm.h |    1 +
>  target/i386/cpu.c       |    7 +
>  7 files changed, 4782 insertions(+)
>  create mode 100644 disas/x86-core.c
>  create mode 100644 disas/x86-disas.c
>  create mode 100644 disas/x86.h
> 

> diff --git a/disas/x86-core.c b/disas/x86-core.c
> new file mode 100644
> index 000000000000..c4f7034e3420
> --- /dev/null
> +++ b/disas/x86-core.c
> @@ -0,0 +1,2716 @@
> +/*
> + * Copyright (c) 2024-2025 Michael Clark
> + *
> + * SPDX-License-Identifier: MIT

Note that we expect contributions to be under GPL-2.0-or-later, unless
derived from existing code that forces use of a different license, which
needs to be explained in the commit message


With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Re: [PATCH v3 4/4] x86-disas: add x86-mini disassembler implementation
Posted by Michael Clark 6 months ago
On 5/14/25 20:17, Daniel P. Berrangé wrote:
> On Wed, May 14, 2025 at 07:39:27PM +1200, Michael Clark wrote:
>> diff --git a/disas/x86-core.c b/disas/x86-core.c
>> new file mode 100644
>> index 000000000000..c4f7034e3420
>> --- /dev/null
>> +++ b/disas/x86-core.c
>> @@ -0,0 +1,2716 @@
>> +/*
>> + * Copyright (c) 2024-2025 Michael Clark
>> + *
>> + * SPDX-License-Identifier: MIT
> 
> Note that we expect contributions to be under GPL-2.0-or-later, unless
> derived from existing code that forces use of a different license, which
> needs to be explained in the commit message

okay no problem, I can do that. there is a freestanding external origin:

https://github.com/michaeljclark/x86

I had a re-spin sitting in my queue for some time and I didn't have any
changes pending so I thought I would send it out. I would have folks
try it in private if they were considering merging it in the future.
one must balance the weight of future maintenance. I know that capstone
can be used to do the same so it is more of an FYI at this point.

Michael.

Re: [PATCH v3 4/4] x86-disas: add x86-mini disassembler implementation
Posted by Daniel P. Berrangé 6 months ago
On Wed, May 14, 2025 at 09:23:58PM +1200, Michael Clark wrote:
> On 5/14/25 20:17, Daniel P. Berrangé wrote:
> > On Wed, May 14, 2025 at 07:39:27PM +1200, Michael Clark wrote:
> > > diff --git a/disas/x86-core.c b/disas/x86-core.c
> > > new file mode 100644
> > > index 000000000000..c4f7034e3420
> > > --- /dev/null
> > > +++ b/disas/x86-core.c
> > > @@ -0,0 +1,2716 @@
> > > +/*
> > > + * Copyright (c) 2024-2025 Michael Clark
> > > + *
> > > + * SPDX-License-Identifier: MIT
> > 
> > Note that we expect contributions to be under GPL-2.0-or-later, unless
> > derived from existing code that forces use of a different license, which
> > needs to be explained in the commit message
> 
> okay no problem, I can do that. there is a freestanding external origin:
> 
> https://github.com/michaeljclark/x86

IIUC, that would only apply to the x86-core.c file - the other files
tagged with MIT look like thy were written just for QEMU inclusion.

> I had a re-spin sitting in my queue for some time and I didn't have any
> changes pending so I thought I would send it out. I would have folks
> try it in private if they were considering merging it in the future.
> one must balance the weight of future maintenance. I know that capstone
> can be used to do the same so it is more of an FYI at this point.

Ok

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|


Re: [PATCH v3 4/4] x86-disas: add x86-mini disassembler implementation
Posted by Michael Clark 6 months ago
On 5/14/25 21:33, Daniel P. Berrangé wrote:
> On Wed, May 14, 2025 at 09:23:58PM +1200, Michael Clark wrote:
>> On 5/14/25 20:17, Daniel P. Berrangé wrote:
>>> On Wed, May 14, 2025 at 07:39:27PM +1200, Michael Clark wrote:
>>>> diff --git a/disas/x86-core.c b/disas/x86-core.c
>>>> new file mode 100644
>>>> index 000000000000..c4f7034e3420
>>>> --- /dev/null
>>>> +++ b/disas/x86-core.c
>>>> @@ -0,0 +1,2716 @@
>>>> +/*
>>>> + * Copyright (c) 2024-2025 Michael Clark
>>>> + *
>>>> + * SPDX-License-Identifier: MIT
>>>
>>> Note that we expect contributions to be under GPL-2.0-or-later, unless
>>> derived from existing code that forces use of a different license, which
>>> needs to be explained in the commit message
>>
>> okay no problem, I can do that. there is a freestanding external origin:
>>
>> https://github.com/michaeljclark/x86
> 
> IIUC, that would only apply to the x86-core.c file - the other files
> tagged with MIT look like thy were written just for QEMU inclusion.

there are two files that should stay MIT licensed:

- disas/x86.h
- disas/x86-core.c

# which bits are neutral

half of x86.h is neutral and represents an expression of C structures
and enumerations that map precisely to the structures and enumerations
in the Intel SDM for the core encoding. things like prefixes, ModRM,
SIB, VEX, EVEX, and the VEX maps which are general and would come
out the same had someone else transcribed them from the Intel SDM,
given that the enum values precisely map to the binary encoding.

# which bits are unique

on the other hand there is a rather unique compression for the opcode
encoding metadata related to densely packing the encoding in the Intel
CSV metadata, as well as a completely new LEX format which is unusual
in that it makes sense had Intel encoded the metadata like this in the
first place, given some reflection on the VEX and EVEX encodings. it
took a surprising amount of time to do this because I started on this
about 5 years ago in May 2020 from looking at my home directory. and
I had several false starts where I completely discarded prior work.

- x86_enc_*, x86_opr_*, and x86_ord_* and x86_codec are unique and
   represent a very densely packed encoding of x86 codec metadata.

it was quite weird to write because I wrote no code for three months,
August to October 2024, just metadata. and I started from scratch
and completely threw out previous attempts which had included some
code from TCG. you can see that the emitter is radically different.

see x86-core.c:x86_codec_write

# what is QEMU-specific

the disassembly stub could change to GPL-2.0-or-later no problem:

- disas/x86-disas.c

# tangent on MIT licensed TCG headers

tangential to this. I have extracted TCG MIT headers from QEMU and
have a separate goal to write a new TCG compiler with the same API
but using this new x86 back-end. I have an unsent draft with some
licensing questions but I decided to just believe the MIT license.

I am choosing to use the interface portion for a new freestanding
TCG-workalike compiler. the Google LLC v. Oracle America Inc.
Supreme Court ruling on fair-use doctrine in relation to interface
header portions of existing works seems to make that plausible.

# tangent on instruction selection

there is an exhaustively complete encoding of AVX-512 that has been
fuzz tested against LLVM and it is small in comparison to capstone.
it could potentially be used as an EVEX emitter inside of QEMU.

but I don't have instruction selection yet. I note the metadata has
been de-duplicated compared to NASM. it does not use data from NASM
but I adopted a consistent coding scheme because NASM has been most
faithful to the Intel SDM metadata, which makes it very easy to add
new instructions because we can just copy-paste from the Intel SDM.

in this way LEX seems like something that should have been there
in the first place. because we don't have extraneous opcode bytes.
it ends up as 2-byte OPC+ModRM with masks, plus maps and prefixes,
either legacy or via VEX/EVEX. it makes the decoder very uniform.

for instruction selection I plan to do a combinatorial expansion
to generate enums mapping to subsets of the encodings for memory
or register operands, or other options like broadcast, more like
the denormalized NASM metadata which has thousands more entries
but auto-generated instead, and with type sizes or without for a
selection based on best fit. enums on the right are work-in-progress
from a new generator so that I can add instruction selection. so
it can't be used as an emitter yet until we have enums because
at the moment the emitter requires the opcode from decode to round
trip as opposed to being populated by instruction selection code.

# typed instruction selection enum expansions

   add rw,rw/mw                ADD_r32_r32
   add rw,rw/mw                ADD_r32_m32
   add rw,rw/mw                ADD_r64_r64
   add rw,rw/mw                ADD_r64_m64

   adc rw/mw,iw                ADC_r32_i32
   adc rw/mw,iw                ADC_m32_i32
   adc rw/mw,iw                ADC_r64_i32
   adc rw/mw,iw                ADC_m64_i32

   vxorps xmm,xmm,xmm/m128     VXORPS_v128_v128_v128
   vxorps xmm,xmm,xmm/m128     VXORPS_v128_v128_m128

# untyped instruction selection enum expansions

   add rw,rw/mw                ADD_rr
   add rw,rw/mw                ADD_rm
   adc rw/mw,iw                ADC_ri
   adc rw/mw,iw                ADC_mi

   vxorps xmm,xmm,xmm/m128     VXORPS_vvv
   vxorps xmm,xmm,xmm/m128     VXORPS_vvm

Michael.