diff options
Diffstat (limited to '')
-rw-r--r-- | tools/arch/x86/lib/inat.c | 2 | ||||
-rw-r--r-- | tools/arch/x86/lib/insn.c | 376 | ||||
-rw-r--r-- | tools/arch/x86/lib/memcpy_64.S | 64 | ||||
-rw-r--r-- | tools/arch/x86/lib/memset_64.S | 66 | ||||
-rw-r--r-- | tools/arch/x86/lib/x86-opcode-map.txt | 433 |
5 files changed, 636 insertions, 305 deletions
diff --git a/tools/arch/x86/lib/inat.c b/tools/arch/x86/lib/inat.c index 4f5ed49e1b4e..dfbcc6405941 100644 --- a/tools/arch/x86/lib/inat.c +++ b/tools/arch/x86/lib/inat.c @@ -4,7 +4,7 @@ * * Written by Masami Hiramatsu <mhiramat@redhat.com> */ -#include "../include/asm/insn.h" +#include "../include/asm/insn.h" /* __ignore_sync_check__ */ /* Attribute tables are generated from opcode map */ #include "inat-tables.c" diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index 0151dfc6da61..bce69c6bfa69 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -5,25 +5,43 @@ * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ +#include <linux/kernel.h> #ifdef __KERNEL__ #include <linux/string.h> #else #include <string.h> #endif -#include "../include/asm/inat.h" -#include "../include/asm/insn.h" - -#include "../include/asm/emulate_prefix.h" +#include "../include/asm/inat.h" /* __ignore_sync_check__ */ +#include "../include/asm/insn.h" /* __ignore_sync_check__ */ +#include <linux/unaligned.h> /* __ignore_sync_check__ */ + +#include <linux/errno.h> +#include <linux/kconfig.h> + +#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */ + +#define leXX_to_cpu(t, r) \ +({ \ + __typeof__(t) v; \ + switch (sizeof(t)) { \ + case 4: v = le32_to_cpu(r); break; \ + case 2: v = le16_to_cpu(r); break; \ + case 1: v = r; break; \ + default: \ + BUILD_BUG(); break; \ + } \ + v; \ +}) /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + ({ t r = get_unaligned((t *)(insn)->next_byte); (insn)->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r = *(t*)((insn)->next_byte + n); r; }) + ({ t r = get_unaligned((t *)(insn)->next_byte + n); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) @@ -37,6 +55,7 @@ * insn_init() - initialize struct insn * @insn: &struct insn to be initialized * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @buf_len: length of the insn buffer at @kaddr * @x86_64: !0 for 64-bit kernel or 64-bit app */ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) @@ -52,7 +71,7 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) insn->kaddr = kaddr; insn->end_kaddr = kaddr + buf_len; insn->next_byte = kaddr; - insn->x86_64 = x86_64 ? 1 : 0; + insn->x86_64 = x86_64; insn->opnd_bytes = 4; if (x86_64) insn->addr_bytes = 8; @@ -97,8 +116,12 @@ static void insn_get_emulate_prefix(struct insn *insn) * Populates the @insn->prefixes bitmap, and updates @insn->next_byte * to point to the (first) opcode. No effect if @insn->prefixes.got * is already set. + * + * * Returns: + * 0: on success + * < 0: on error */ -void insn_get_prefixes(struct insn *insn) +int insn_get_prefixes(struct insn *insn) { struct insn_field *prefixes = &insn->prefixes; insn_attr_t attr; @@ -106,7 +129,7 @@ void insn_get_prefixes(struct insn *insn) int i, nb; if (prefixes->got) - return; + return 0; insn_get_emulate_prefix(insn); @@ -147,9 +170,9 @@ found: b = insn->prefixes.bytes[3]; for (i = 0; i < nb; i++) if (prefixes->bytes[i] == lb) - prefixes->bytes[i] = b; + insn_set_byte(prefixes, i, b); } - insn->prefixes.bytes[3] = lb; + insn_set_byte(&insn->prefixes, 3, lb); } /* Decode REX prefix */ @@ -157,12 +180,22 @@ found: b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); if (inat_is_rex_prefix(attr)) { - insn->rex_prefix.value = b; - insn->rex_prefix.nbytes = 1; + insn_field_set(&insn->rex_prefix, b, 1); insn->next_byte++; if (X86_REX_W(b)) /* REX.W overrides opnd_size */ insn->opnd_bytes = 8; + } else if (inat_is_rex2_prefix(attr)) { + insn_set_byte(&insn->rex_prefix, 0, b); + b = peek_nbyte_next(insn_byte_t, insn, 1); + insn_set_byte(&insn->rex_prefix, 1, b); + insn->rex_prefix.nbytes = 2; + insn->next_byte += 2; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + insn->rex_prefix.got = 1; + goto vex_end; } } insn->rex_prefix.got = 1; @@ -181,13 +214,13 @@ found: if (X86_MODRM_MOD(b2) != 3) goto vex_end; } - insn->vex_prefix.bytes[0] = b; - insn->vex_prefix.bytes[1] = b2; + insn_set_byte(&insn->vex_prefix, 0, b); + insn_set_byte(&insn->vex_prefix, 1, b2); if (inat_is_evex_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); - insn->vex_prefix.bytes[2] = b2; + insn_set_byte(&insn->vex_prefix, 2, b2); b2 = peek_nbyte_next(insn_byte_t, insn, 3); - insn->vex_prefix.bytes[3] = b2; + insn_set_byte(&insn->vex_prefix, 3, b2); insn->vex_prefix.nbytes = 4; insn->next_byte += 4; if (insn->x86_64 && X86_VEX_W(b2)) @@ -195,7 +228,7 @@ found: insn->opnd_bytes = 8; } else if (inat_is_vex3_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); - insn->vex_prefix.bytes[2] = b2; + insn_set_byte(&insn->vex_prefix, 2, b2); insn->vex_prefix.nbytes = 3; insn->next_byte += 3; if (insn->x86_64 && X86_VEX_W(b2)) @@ -207,7 +240,7 @@ found: * Makes it easier to decode vex.W, vex.vvvv, * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. */ - insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f); insn->vex_prefix.nbytes = 2; insn->next_byte += 2; } @@ -217,8 +250,10 @@ vex_end: prefixes->got = 1; + return 0; + err_out: - return; + return -ENODATA; } /** @@ -230,20 +265,27 @@ err_out: * If necessary, first collects any preceding (prefix) bytes. * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got * is already 1. + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_opcode(struct insn *insn) +int insn_get_opcode(struct insn *insn) { struct insn_field *opcode = &insn->opcode; + int pfx_id, ret; insn_byte_t op; - int pfx_id; + if (opcode->got) - return; - if (!insn->prefixes.got) - insn_get_prefixes(insn); + return 0; + + ret = insn_get_prefixes(insn); + if (ret) + return ret; /* Get first opcode */ op = get_next(insn_byte_t, insn); - opcode->bytes[0] = op; + insn_set_byte(opcode, 0, op); opcode->nbytes = 1; /* Check if there is VEX prefix or not */ @@ -252,14 +294,41 @@ void insn_get_opcode(struct insn *insn) m = insn_vex_m_bits(insn); p = insn_vex_p_bits(insn); insn->attr = inat_get_avx_attribute(op, m, p); + /* SCALABLE EVEX uses p bits to encode operand size */ + if (inat_evex_scalable(insn->attr) && !insn_vex_w_bit(insn) && + p == INAT_PFX_OPNDSZ) + insn->opnd_bytes = 2; if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || (!inat_accept_vex(insn->attr) && - !inat_is_group(insn->attr))) - insn->attr = 0; /* This instruction is bad */ - goto end; /* VEX has only 1 byte for opcode */ + !inat_is_group(insn->attr))) { + /* This instruction is bad */ + insn->attr = 0; + return -EINVAL; + } + /* VEX has only 1 byte for opcode */ + goto end; + } + + /* Check if there is REX2 prefix or not */ + if (insn_is_rex2(insn)) { + if (insn_rex2_m_bit(insn)) { + /* map 1 is escape 0x0f */ + insn_attr_t esc_attr = inat_get_opcode_attribute(0x0f); + + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, esc_attr); + } else { + insn->attr = inat_get_opcode_attribute(op); + } + goto end; } insn->attr = inat_get_opcode_attribute(op); + if (insn->x86_64 && inat_is_invalid64(insn->attr)) { + /* This instruction is invalid, like UD2. Stop decoding. */ + insn->attr &= INAT_INV64; + } + while (inat_is_escape(insn->attr)) { /* Get escaped opcode */ op = get_next(insn_byte_t, insn); @@ -267,13 +336,19 @@ void insn_get_opcode(struct insn *insn) pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); } - if (inat_must_vex(insn->attr)) - insn->attr = 0; /* This instruction is bad */ + + if (inat_must_vex(insn->attr)) { + /* This instruction is bad */ + insn->attr = 0; + return -EINVAL; + } + end: opcode->got = 1; + return 0; err_out: - return; + return -ENODATA; } /** @@ -283,35 +358,47 @@ err_out: * Populates @insn->modrm and updates @insn->next_byte to point past the * ModRM byte, if any. If necessary, first collects the preceding bytes * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_modrm(struct insn *insn) +int insn_get_modrm(struct insn *insn) { struct insn_field *modrm = &insn->modrm; insn_byte_t pfx_id, mod; + int ret; + if (modrm->got) - return; - if (!insn->opcode.got) - insn_get_opcode(insn); + return 0; + + ret = insn_get_opcode(insn); + if (ret) + return ret; if (inat_has_modrm(insn->attr)) { mod = get_next(insn_byte_t, insn); - modrm->value = mod; - modrm->nbytes = 1; + insn_field_set(modrm, mod, 1); if (inat_is_group(insn->attr)) { pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_group_attribute(mod, pfx_id, insn->attr); - if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) - insn->attr = 0; /* This is bad */ + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) { + /* Bad insn */ + insn->attr = 0; + return -EINVAL; + } } } if (insn->x86_64 && inat_is_force64(insn->attr)) insn->opnd_bytes = 8; + modrm->got = 1; + return 0; err_out: - return; + return -ENODATA; } @@ -325,16 +412,19 @@ err_out: int insn_rip_relative(struct insn *insn) { struct insn_field *modrm = &insn->modrm; + int ret; if (!insn->x86_64) return 0; - if (!modrm->got) - insn_get_modrm(insn); + + ret = insn_get_modrm(insn); + if (ret) + return 0; /* * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. */ - return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); + return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5); } /** @@ -343,27 +433,37 @@ int insn_rip_relative(struct insn *insn) * * If necessary, first collects the instruction up to and including the * ModRM byte. + * + * Returns: + * 0: if decoding succeeded + * < 0: otherwise. */ -void insn_get_sib(struct insn *insn) +int insn_get_sib(struct insn *insn) { insn_byte_t modrm; + int ret; if (insn->sib.got) - return; - if (!insn->modrm.got) - insn_get_modrm(insn); + return 0; + + ret = insn_get_modrm(insn); + if (ret) + return ret; + if (insn->modrm.nbytes) { - modrm = (insn_byte_t)insn->modrm.value; + modrm = insn->modrm.bytes[0]; if (insn->addr_bytes != 2 && X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { - insn->sib.value = get_next(insn_byte_t, insn); - insn->sib.nbytes = 1; + insn_field_set(&insn->sib, + get_next(insn_byte_t, insn), 1); } } insn->sib.got = 1; + return 0; + err_out: - return; + return -ENODATA; } @@ -374,15 +474,23 @@ err_out: * If necessary, first collects the instruction up to and including the * SIB byte. * Displacement value is sign-expanded. + * + * * Returns: + * 0: if decoding succeeded + * < 0: otherwise. */ -void insn_get_displacement(struct insn *insn) +int insn_get_displacement(struct insn *insn) { insn_byte_t mod, rm, base; + int ret; if (insn->displacement.got) - return; - if (!insn->sib.got) - insn_get_sib(insn); + return 0; + + ret = insn_get_sib(insn); + if (ret) + return ret; + if (insn->modrm.nbytes) { /* * Interpreting the modrm byte: @@ -407,27 +515,27 @@ void insn_get_displacement(struct insn *insn) if (mod == 3) goto out; if (mod == 1) { - insn->displacement.value = get_next(signed char, insn); - insn->displacement.nbytes = 1; + insn_field_set(&insn->displacement, + get_next(signed char, insn), 1); } else if (insn->addr_bytes == 2) { if ((mod == 0 && rm == 6) || mod == 2) { - insn->displacement.value = - get_next(short, insn); - insn->displacement.nbytes = 2; + insn_field_set(&insn->displacement, + get_next(short, insn), 2); } } else { if ((mod == 0 && rm == 5) || mod == 2 || (mod == 0 && base == 5)) { - insn->displacement.value = get_next(int, insn); - insn->displacement.nbytes = 4; + insn_field_set(&insn->displacement, + get_next(int, insn), 4); } } } out: insn->displacement.got = 1; + return 0; err_out: - return; + return -ENODATA; } /* Decode moffset16/32/64. Return 0 if failed */ @@ -435,18 +543,14 @@ static int __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: - insn->moffset1.value = get_next(short, insn); - insn->moffset1.nbytes = 2; + insn_field_set(&insn->moffset1, get_next(short, insn), 2); break; case 4: - insn->moffset1.value = get_next(int, insn); - insn->moffset1.nbytes = 4; + insn_field_set(&insn->moffset1, get_next(int, insn), 4); break; case 8: - insn->moffset1.value = get_next(int, insn); - insn->moffset1.nbytes = 4; - insn->moffset2.value = get_next(int, insn); - insn->moffset2.nbytes = 4; + insn_field_set(&insn->moffset1, get_next(int, insn), 4); + insn_field_set(&insn->moffset2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -464,13 +568,11 @@ static int __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate.value = get_next(short, insn); - insn->immediate.nbytes = 2; + insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case 4: case 8: - insn->immediate.value = get_next(int, insn); - insn->immediate.nbytes = 4; + insn_field_set(&insn->immediate, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -487,18 +589,15 @@ static int __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate1.value = get_next(short, insn); - insn->immediate1.nbytes = 2; + insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: - insn->immediate1.value = get_next(int, insn); + insn_field_set(&insn->immediate1, get_next(int, insn), 4); insn->immediate1.nbytes = 4; break; case 8: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; - insn->immediate2.value = get_next(int, insn); - insn->immediate2.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); + insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -515,12 +614,10 @@ static int __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate1.value = get_next(short, insn); - insn->immediate1.nbytes = 2; + insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); break; case 8: /* ptr16:64 is not exist (no segment) */ @@ -528,8 +625,7 @@ static int __get_immptr(struct insn *insn) default: /* opnd_bytes must be modified manually */ goto err_out; } - insn->immediate2.value = get_next(unsigned short, insn); - insn->immediate2.nbytes = 2; + insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2); insn->immediate1.got = insn->immediate2.got = 1; return 1; @@ -538,20 +634,28 @@ err_out: } /** - * insn_get_immediate() - Get the immediates of instruction + * insn_get_immediate() - Get the immediate in an instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * displacement bytes. * Basically, most of immediates are sign-expanded. Unsigned-value can be - * get by bit masking with ((1 << (nbytes * 8)) - 1) + * computed by bit masking with ((1 << (nbytes * 8)) - 1) + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_immediate(struct insn *insn) +int insn_get_immediate(struct insn *insn) { + int ret; + if (insn->immediate.got) - return; - if (!insn->displacement.got) - insn_get_displacement(insn); + return 0; + + ret = insn_get_displacement(insn); + if (ret) + return ret; if (inat_has_moffset(insn->attr)) { if (!__get_moffset(insn)) @@ -560,27 +664,21 @@ void insn_get_immediate(struct insn *insn) } if (!inat_has_immediate(insn->attr)) - /* no immediates */ goto done; switch (inat_immediate_size(insn->attr)) { case INAT_IMM_BYTE: - insn->immediate.value = get_next(signed char, insn); - insn->immediate.nbytes = 1; + insn_field_set(&insn->immediate, get_next(signed char, insn), 1); break; case INAT_IMM_WORD: - insn->immediate.value = get_next(short, insn); - insn->immediate.nbytes = 2; + insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case INAT_IMM_DWORD: - insn->immediate.value = get_next(int, insn); - insn->immediate.nbytes = 4; + insn_field_set(&insn->immediate, get_next(int, insn), 4); break; case INAT_IMM_QWORD: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; - insn->immediate2.value = get_next(int, insn); - insn->immediate2.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); + insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; case INAT_IMM_PTR: if (!__get_immptr(insn)) @@ -599,14 +697,14 @@ void insn_get_immediate(struct insn *insn) goto err_out; } if (inat_has_second_immediate(insn->attr)) { - insn->immediate2.value = get_next(signed char, insn); - insn->immediate2.nbytes = 1; + insn_field_set(&insn->immediate2, get_next(signed char, insn), 1); } done: insn->immediate.got = 1; + return 0; err_out: - return; + return -ENODATA; } /** @@ -615,13 +713,63 @@ err_out: * * If necessary, first collects the instruction up to and including the * immediates bytes. - */ -void insn_get_length(struct insn *insn) + * + * Returns: + * - 0 on success + * - < 0 on error +*/ +int insn_get_length(struct insn *insn) { + int ret; + if (insn->length) - return; - if (!insn->immediate.got) - insn_get_immediate(insn); + return 0; + + ret = insn_get_immediate(insn); + if (ret) + return ret; + insn->length = (unsigned char)((unsigned long)insn->next_byte - (unsigned long)insn->kaddr); + + return 0; +} + +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + +/** + * insn_decode() - Decode an x86 instruction + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @buf_len: length of the insn buffer at @kaddr + * @m: insn mode, see enum insn_mode + * + * Returns: + * 0: if decoding succeeded + * < 0: otherwise. + */ +int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m) +{ + int ret; + +#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */ + + if (m == INSN_MODE_KERN) + insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64)); + else + insn_init(insn, kaddr, buf_len, m == INSN_MODE_64); + + ret = insn_get_length(insn); + if (ret) + return ret; + + if (insn_complete(insn)) + return 0; + + return -EINVAL; } diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 0b5b8ae56bd9..ccc3d923fc1e 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -1,22 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright 2002 Andi Kleen */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/errno.h> #include <asm/cpufeatures.h> -#include <asm/alternative-asm.h> -#include <asm/export.h> +#include <asm/alternative.h> -.pushsection .noinstr.text, "ax" - -/* - * We build a jump to memcpy_orig by default which gets NOPped out on - * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which - * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs - * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. - */ - -.weak memcpy +.section .noinstr.text, "ax" /* * memcpy - Copy a memory block. @@ -28,37 +19,31 @@ * * Output: * rax original destination + * + * The FSRM alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. + * + * The 'rep movsb' itself is small enough to replace the call, but the + * two register moves blow up the code. And one of them is "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_LOCAL(memcpy) - ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memcpy_erms", X86_FEATURE_ERMS +SYM_TYPED_FUNC_START(__memcpy) + ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM movq %rdi, %rax movq %rdx, %rcx - shrq $3, %rcx - andl $7, %edx - rep movsq - movl %edx, %ecx rep movsb - ret -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) + RET +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) -/* - * memcpy_erms() - enhanced fast string memcpy. This is faster and - * simpler than memcpy. Use memcpy_erms when possible. - */ -SYM_FUNC_START(memcpy_erms) - movq %rdi, %rax - movq %rdx, %rcx - rep movsb - ret -SYM_FUNC_END(memcpy_erms) +SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) +SYM_PIC_ALIAS(memcpy) +EXPORT_SYMBOL(memcpy) -SYM_FUNC_START(memcpy_orig) +SYM_FUNC_START_LOCAL(memcpy_orig) movq %rdi, %rax cmpq $0x20, %rdx @@ -139,7 +124,7 @@ SYM_FUNC_START(memcpy_orig) movq %r9, 1*8(%rdi) movq %r10, -2*8(%rdi, %rdx) movq %r11, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_16bytes: cmpl $8, %edx @@ -151,7 +136,7 @@ SYM_FUNC_START(memcpy_orig) movq -1*8(%rsi, %rdx), %r9 movq %r8, 0*8(%rdi) movq %r9, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_8bytes: cmpl $4, %edx @@ -164,7 +149,7 @@ SYM_FUNC_START(memcpy_orig) movl -4(%rsi, %rdx), %r8d movl %ecx, (%rdi) movl %r8d, -4(%rdi, %rdx) - retq + RET .p2align 4 .Lless_3bytes: subl $1, %edx @@ -182,7 +167,6 @@ SYM_FUNC_START(memcpy_orig) movb %cl, (%rdi) .Lend: - retq + RET SYM_FUNC_END(memcpy_orig) -.popsection diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index fd5d25a474b7..fb5a03cf5ab7 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -1,11 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2002 Andi Kleen, SuSE Labs */ +#include <linux/export.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/cpufeatures.h> -#include <asm/alternative-asm.h> +#include <asm/alternative.h> -.weak memset +.section .noinstr.text, "ax" /* * ISO C memset - set a memory block to a byte value. This function uses fast @@ -17,55 +19,33 @@ * rdx count (bytes) * * rax original destination - */ -SYM_FUNC_START_ALIAS(memset) -SYM_FUNC_START(__memset) - /* - * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended - * to use it when possible. If not available, use fast string instructions. - * - * Otherwise, use original memset function. - */ - ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memset_erms", X86_FEATURE_ERMS - - movq %rdi,%r9 - movq %rdx,%rcx - andl $7,%edx - shrq $3,%rcx - /* expand byte value */ - movzbl %sil,%esi - movabs $0x0101010101010101,%rax - imulq %rsi,%rax - rep stosq - movl %edx,%ecx - rep stosb - movq %r9,%rax - ret -SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) - -/* - * ISO C memset - set a memory block to a byte value. This function uses - * enhanced rep stosb to override the fast string function. - * The code is simpler and shorter than the fast string function as well. * - * rdi destination - * rsi value (char) - * rdx count (bytes) + * The FSRS alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. * - * rax original destination + * The 'rep stosb' itself is small enough to replace the call, but all + * the register moves blow up the code. And two of them are "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ -SYM_FUNC_START(memset_erms) +SYM_TYPED_FUNC_START(__memset) + ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS + movq %rdi,%r9 movb %sil,%al movq %rdx,%rcx rep stosb movq %r9,%rax - ret -SYM_FUNC_END(memset_erms) + RET +SYM_FUNC_END(__memset) +EXPORT_SYMBOL(__memset) + +SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) +SYM_PIC_ALIAS(memset) +EXPORT_SYMBOL(memset) -SYM_FUNC_START(memset_orig) +SYM_FUNC_START_LOCAL(memset_orig) movq %rdi,%r10 /* expand byte value */ @@ -124,7 +104,7 @@ SYM_FUNC_START(memset_orig) .Lende: movq %r10,%rax - ret + RET .Lbad_alignment: cmpq $7,%rdx diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index ec31f5b60323..262f7ca1fb95 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -23,6 +23,7 @@ # # AVX Superscripts # (ev): this opcode requires EVEX prefix. +# (es): this opcode requires EVEX prefix and is SCALABALE. # (evo): this opcode is changed by EVEX prefix (EVEX opcode) # (v): this opcode requires VEX prefix. # (v1): this opcode only supports 128bit VEX. @@ -33,6 +34,10 @@ # - (F2): the last prefix is 0xF2 # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. +# +# REX2 Prefix Superscripts +# - (!REX2): REX2 is not allowed +# - (REX2): REX2 variant e.g. JMPABS Table: one byte opcode Referrer: @@ -142,13 +147,13 @@ AVXcode: # 0x60 - 0x6f 60: PUSHA/PUSHAD (i64) 61: POPA/POPAD (i64) -62: BOUND Gv,Ma (i64) | EVEX (Prefix) +62: BOUND Gv,Ma (i64) | EVEX (Prefix),(o64) 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) 64: SEG=FS (Prefix) 65: SEG=GS (Prefix) 66: Operand-Size (Prefix) 67: Address-Size (Prefix) -68: PUSH Iz (d64) +68: PUSH Iz 69: IMUL Gv,Ev,Iz 6a: PUSH Ib (d64) 6b: IMUL Gv,Ev,Ib @@ -157,22 +162,22 @@ AVXcode: 6e: OUTS/OUTSB DX,Xb 6f: OUTS/OUTSW/OUTSD DX,Xz # 0x70 - 0x7f -70: JO Jb -71: JNO Jb -72: JB/JNAE/JC Jb -73: JNB/JAE/JNC Jb -74: JZ/JE Jb -75: JNZ/JNE Jb -76: JBE/JNA Jb -77: JNBE/JA Jb -78: JS Jb -79: JNS Jb -7a: JP/JPE Jb -7b: JNP/JPO Jb -7c: JL/JNGE Jb -7d: JNL/JGE Jb -7e: JLE/JNG Jb -7f: JNLE/JG Jb +70: JO Jb (!REX2) +71: JNO Jb (!REX2) +72: JB/JNAE/JC Jb (!REX2) +73: JNB/JAE/JNC Jb (!REX2) +74: JZ/JE Jb (!REX2) +75: JNZ/JNE Jb (!REX2) +76: JBE/JNA Jb (!REX2) +77: JNBE/JA Jb (!REX2) +78: JS Jb (!REX2) +79: JNS Jb (!REX2) +7a: JP/JPE Jb (!REX2) +7b: JNP/JPO Jb (!REX2) +7c: JL/JNGE Jb (!REX2) +7d: JNL/JGE Jb (!REX2) +7e: JLE/JNG Jb (!REX2) +7f: JNLE/JG Jb (!REX2) # 0x80 - 0x8f 80: Grp1 Eb,Ib (1A) 81: Grp1 Ev,Iz (1A) @@ -208,24 +213,24 @@ AVXcode: 9e: SAHF 9f: LAHF # 0xa0 - 0xaf -a0: MOV AL,Ob -a1: MOV rAX,Ov -a2: MOV Ob,AL -a3: MOV Ov,rAX -a4: MOVS/B Yb,Xb -a5: MOVS/W/D/Q Yv,Xv -a6: CMPS/B Xb,Yb -a7: CMPS/W/D Xv,Yv -a8: TEST AL,Ib -a9: TEST rAX,Iz -aa: STOS/B Yb,AL -ab: STOS/W/D/Q Yv,rAX -ac: LODS/B AL,Xb -ad: LODS/W/D/Q rAX,Xv -ae: SCAS/B AL,Yb +a0: MOV AL,Ob (!REX2) +a1: MOV rAX,Ov (!REX2) | JMPABS O (REX2),(o64) +a2: MOV Ob,AL (!REX2) +a3: MOV Ov,rAX (!REX2) +a4: MOVS/B Yb,Xb (!REX2) +a5: MOVS/W/D/Q Yv,Xv (!REX2) +a6: CMPS/B Xb,Yb (!REX2) +a7: CMPS/W/D Xv,Yv (!REX2) +a8: TEST AL,Ib (!REX2) +a9: TEST rAX,Iz (!REX2) +aa: STOS/B Yb,AL (!REX2) +ab: STOS/W/D/Q Yv,rAX (!REX2) +ac: LODS/B AL,Xb (!REX2) +ad: LODS/W/D/Q rAX,Xv (!REX2) +ae: SCAS/B AL,Yb (!REX2) # Note: The May 2011 Intel manual shows Xv for the second parameter of the # next instruction but Yv is correct -af: SCAS/W/D/Q rAX,Yv +af: SCAS/W/D/Q rAX,Yv (!REX2) # 0xb0 - 0xbf b0: MOV AL/R8L,Ib b1: MOV CL/R9L,Ib @@ -248,8 +253,8 @@ c0: Grp2 Eb,Ib (1A) c1: Grp2 Ev,Ib (1A) c2: RETN Iw (f64) c3: RETN -c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) -c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix),(o64) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix),(o64) c6: Grp11A Eb,Ib (1A) c7: Grp11B Ev,Iz (1A) c8: ENTER Iw,Ib @@ -266,7 +271,7 @@ d1: Grp2 Ev,1 (1A) d2: Grp2 Eb,CL (1A) d3: Grp2 Ev,CL (1A) d4: AAM Ib (i64) -d5: AAD Ib (i64) +d5: AAD Ib (i64) | REX2 (Prefix),(o64) d6: d7: XLAT/XLATB d8: ESC @@ -281,26 +286,26 @@ df: ESC # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. -e0: LOOPNE/LOOPNZ Jb (f64) -e1: LOOPE/LOOPZ Jb (f64) -e2: LOOP Jb (f64) -e3: JrCXZ Jb (f64) -e4: IN AL,Ib -e5: IN eAX,Ib -e6: OUT Ib,AL -e7: OUT Ib,eAX +e0: LOOPNE/LOOPNZ Jb (f64),(!REX2) +e1: LOOPE/LOOPZ Jb (f64),(!REX2) +e2: LOOP Jb (f64),(!REX2) +e3: JrCXZ Jb (f64),(!REX2) +e4: IN AL,Ib (!REX2) +e5: IN eAX,Ib (!REX2) +e6: OUT Ib,AL (!REX2) +e7: OUT Ib,eAX (!REX2) # With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset # in "near" jumps and calls is 16-bit. For CALL, # push of return address is 16-bit wide, RSP is decremented by 2 # but is not truncated to 16 bits, unlike RIP. -e8: CALL Jz (f64) -e9: JMP-near Jz (f64) -ea: JMP-far Ap (i64) -eb: JMP-short Jb (f64) -ec: IN AL,DX -ed: IN eAX,DX -ee: OUT DX,AL -ef: OUT DX,eAX +e8: CALL Jz (f64),(!REX2) +e9: JMP-near Jz (f64),(!REX2) +ea: JMP-far Ap (i64),(!REX2) +eb: JMP-short Jb (f64),(!REX2) +ec: IN AL,DX (!REX2) +ed: IN eAX,DX (!REX2) +ee: OUT DX,AL (!REX2) +ef: OUT DX,eAX (!REX2) # 0xf0 - 0xff f0: LOCK (Prefix) f1: @@ -386,14 +391,14 @@ AVXcode: 1 2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) 2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) # 0x0f 0x30-0x3f -30: WRMSR -31: RDTSC -32: RDMSR -33: RDPMC -34: SYSENTER -35: SYSEXIT +30: WRMSR (!REX2) +31: RDTSC (!REX2) +32: RDMSR (!REX2) +33: RDPMC (!REX2) +34: SYSENTER (!REX2) +35: SYSEXIT (!REX2) 36: -37: GETSEC +37: GETSEC (!REX2) 38: escape # 3-byte escape 1 39: 3a: escape # 3-byte escape 2 @@ -473,22 +478,22 @@ AVXcode: 1 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). -80: JO Jz (f64) -81: JNO Jz (f64) -82: JB/JC/JNAE Jz (f64) -83: JAE/JNB/JNC Jz (f64) -84: JE/JZ Jz (f64) -85: JNE/JNZ Jz (f64) -86: JBE/JNA Jz (f64) -87: JA/JNBE Jz (f64) -88: JS Jz (f64) -89: JNS Jz (f64) -8a: JP/JPE Jz (f64) -8b: JNP/JPO Jz (f64) -8c: JL/JNGE Jz (f64) -8d: JNL/JGE Jz (f64) -8e: JLE/JNG Jz (f64) -8f: JNLE/JG Jz (f64) +80: JO Jz (f64),(!REX2) +81: JNO Jz (f64),(!REX2) +82: JB/JC/JNAE Jz (f64),(!REX2) +83: JAE/JNB/JNC Jz (f64),(!REX2) +84: JE/JZ Jz (f64),(!REX2) +85: JNE/JNZ Jz (f64),(!REX2) +86: JBE/JNA Jz (f64),(!REX2) +87: JA/JNBE Jz (f64),(!REX2) +88: JS Jz (f64),(!REX2) +89: JNS Jz (f64),(!REX2) +8a: JP/JPE Jz (f64),(!REX2) +8b: JNP/JPO Jz (f64),(!REX2) +8c: JL/JNGE Jz (f64),(!REX2) +8d: JNL/JGE Jz (f64),(!REX2) +8e: JLE/JNG Jz (f64),(!REX2) +8f: JNLE/JG Jz (f64),(!REX2) # 0x0f 0x90-0x9f 90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) 91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) @@ -690,32 +695,40 @@ AVXcode: 2 45: vpsrlvd/q Vx,Hx,Wx (66),(v) 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x4b +# Skip 0x48 +49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B) +# Skip 0x4a +4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v) 4c: vrcp14ps/d Vpd,Wpd (66),(ev) 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) 4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) -50: vpdpbusd Vx,Hx,Wx (66),(ev) -51: vpdpbusds Vx,Hx,Wx (66),(ev) -52: vdpbf16ps Vx,Hx,Wx (F3),(ev) | vpdpwssd Vx,Hx,Wx (66),(ev) | vp4dpwssd Vdqq,Hdqq,Wdq (F2),(ev) -53: vpdpwssds Vx,Hx,Wx (66),(ev) | vp4dpwssds Vdqq,Hdqq,Wdq (F2),(ev) +50: vpdpbusd Vx,Hx,Wx (66) | vpdpbssd Vx,Hx,Wx (F2),(v) | vpdpbsud Vx,Hx,Wx (F3),(v) | vpdpbuud Vx,Hx,Wx (v) +51: vpdpbusds Vx,Hx,Wx (66) | vpdpbssds Vx,Hx,Wx (F2),(v) | vpdpbsuds Vx,Hx,Wx (F3),(v) | vpdpbuuds Vx,Hx,Wx (v) +52: vdpbf16ps Vx,Hx,Wx (F3),(ev) | vpdpwssd Vx,Hx,Wx (66) | vp4dpwssd Vdqq,Hdqq,Wdq (F2),(ev) +53: vpdpwssds Vx,Hx,Wx (66) | vp4dpwssds Vdqq,Hdqq,Wdq (F2),(ev) 54: vpopcntb/w Vx,Wx (66),(ev) 55: vpopcntd/q Vx,Wx (66),(ev) 58: vpbroadcastd Vx,Wx (66),(v) 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) -# Skip 0x5c-0x61 +5c: TDPBF16PS Vt,Wt,Ht (F3),(v1) | TDPFP16PS Vt,Wt,Ht (F2),(v1),(o64) +# Skip 0x5d +5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1) +# Skip 0x5f-0x61 62: vpexpandb/w Vx,Wx (66),(ev) 63: vpcompressb/w Wx,Vx (66),(ev) 64: vpblendmd/q Vx,Hx,Wx (66),(ev) 65: vblendmps/d Vx,Hx,Wx (66),(ev) 66: vpblendmb/w Vx,Hx,Wx (66),(ev) 68: vp2intersectd/q Kx,Hx,Wx (F2),(ev) -# Skip 0x69-0x6f +# Skip 0x69-0x6b +6c: TCMMIMFP16PS Vt,Wt,Ht (66),(v1),(o64) | TCMMRLFP16PS Vt,Wt,Ht (v1),(o64) +# Skip 0x6d-0x6f 70: vpshldvw Vx,Hx,Wx (66),(ev) 71: vpshldvd/q Vx,Hx,Wx (66),(ev) -72: vcvtne2ps2bf16 Vx,Hx,Wx (F2),(ev) | vcvtneps2bf16 Vx,Wx (F3),(ev) | vpshrdvw Vx,Hx,Wx (66),(ev) +72: vcvtne2ps2bf16 Vx,Hx,Wx (F2),(ev) | vcvtneps2bf16 Vx,Wx (F3) | vpshrdvw Vx,Hx,Wx (66),(ev) 73: vpshrdvd/q Vx,Hx,Wx (66),(ev) 75: vpermi2b/w Vx,Hx,Wx (66),(ev) 76: vpermi2d/q Vx,Hx,Wx (66),(ev) @@ -771,8 +784,10 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) -b4: vpmadd52luq Vx,Hx,Wx (66),(ev) -b5: vpmadd52huq Vx,Hx,Wx (66),(ev) +b0: vcvtneebf162ps Vx,Mx (F3),(!11B),(v) | vcvtneeph2ps Vx,Mx (66),(!11B),(v) | vcvtneobf162ps Vx,Mx (F2),(!11B),(v) | vcvtneoph2ps Vx,Mx (!11B),(v) +b1: vbcstnebf162ps Vx,Mw (F3),(!11B),(v) | vbcstnesh2ps Vx,Mw (66),(!11B),(v) +b4: vpmadd52luq Vx,Hx,Wx (66) +b5: vpmadd52huq Vx,Hx,Wx (66) b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) @@ -790,15 +805,35 @@ c7: Grp19 (1A) c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev) c9: sha1msg1 Vdq,Wdq ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) -cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) -cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) -cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) +cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) | vsha512rnds2 Vqq,Hqq,Udq (F2),(11B),(v) +cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) | vsha512msg1 Vqq,Udq (F2),(11B),(v) +cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) | vsha512msg2 Vqq,Uqq (F2),(11B),(v) cf: vgf2p8mulb Vx,Wx (66) +d2: vpdpwsud Vx,Hx,Wx (F3),(v) | vpdpwusd Vx,Hx,Wx (66),(v) | vpdpwuud Vx,Hx,Wx (v) +d3: vpdpwsuds Vx,Hx,Wx (F3),(v) | vpdpwusds Vx,Hx,Wx (66),(v) | vpdpwuuds Vx,Hx,Wx (v) +d8: AESENCWIDE128KL Qpi (F3),(000),(00B) | AESENCWIDE256KL Qpi (F3),(000),(10B) | AESDECWIDE128KL Qpi (F3),(000),(01B) | AESDECWIDE256KL Qpi (F3),(000),(11B) +da: vsm3msg1 Vdq,Hdq,Udq (v1) | vsm3msg2 Vdq,Hdq,Udq (66),(v1) | vsm4key4 Vx,Hx,Wx (F3),(v) | vsm4rnds4 Vx,Hx,Wx (F2),(v) db: VAESIMC Vdq,Wdq (66),(v1) -dc: vaesenc Vx,Hx,Wx (66) -dd: vaesenclast Vx,Hx,Wx (66) -de: vaesdec Vx,Hx,Wx (66) -df: vaesdeclast Vx,Hx,Wx (66) +dc: vaesenc Vx,Hx,Wx (66) | LOADIWKEY Vx,Hx (F3) | AESENC128KL Vpd,Qpi (F3) +dd: vaesenclast Vx,Hx,Wx (66) | AESDEC128KL Vpd,Qpi (F3) +de: vaesdec Vx,Hx,Wx (66) | AESENC256KL Vpd,Qpi (F3) +df: vaesdeclast Vx,Hx,Wx (66) | AESDEC256KL Vpd,Qpi (F3) +e0: CMPOXADD My,Gy,By (66),(v1),(o64) +e1: CMPNOXADD My,Gy,By (66),(v1),(o64) +e2: CMPBXADD My,Gy,By (66),(v1),(o64) +e3: CMPNBXADD My,Gy,By (66),(v1),(o64) +e4: CMPZXADD My,Gy,By (66),(v1),(o64) +e5: CMPNZXADD My,Gy,By (66),(v1),(o64) +e6: CMPBEXADD My,Gy,By (66),(v1),(o64) +e7: CMPNBEXADD My,Gy,By (66),(v1),(o64) +e8: CMPSXADD My,Gy,By (66),(v1),(o64) +e9: CMPNSXADD My,Gy,By (66),(v1),(o64) +ea: CMPPXADD My,Gy,By (66),(v1),(o64) +eb: CMPNPXADD My,Gy,By (66),(v1),(o64) +ec: CMPLXADD My,Gy,By (66),(v1),(o64) +ed: CMPNLXADD My,Gy,By (66),(v1),(o64) +ee: CMPLEXADD My,Gy,By (66),(v1),(o64) +ef: CMPNLEXADD My,Gy,By (66),(v1),(o64) f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) f2: ANDN Gy,By,Ey (v) @@ -806,8 +841,11 @@ f3: Grp17 (1A) f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66) f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) -f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) +f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) | URDMSR Rq,Gq (F2),(11B) | UWRMSR Gq,Rq (F3),(11B) f9: MOVDIRI My,Gy +fa: ENCODEKEY128 Ew,Ew (F3) +fb: ENCODEKEY256 Ew,Ew (F3) +fc: AADD My,Gy | AAND My,Gy (66) | AOR My,Gy (F2) | AXOR My,Gy (F3) EndTable Table: 3-byte opcode 2 (0x0f 0x3a) @@ -822,9 +860,9 @@ AVXcode: 3 05: vpermilpd Vx,Wx,Ib (66),(v) 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 07: -08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo) 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) -0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo) 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 0c: vblendps Vx,Hx,Wx,Ib (66) 0d: vblendpd Vx,Hx,Wx,Ib (66) @@ -846,8 +884,8 @@ AVXcode: 3 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) -26: vgetmantps/d Vx,Wx,Ib (66),(ev) -27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev) 30: kshiftrb/w Vk,Uk,Ib (66),(v) 31: kshiftrd/q Vk,Uk,Ib (66),(v) 32: kshiftlb/w Vk,Uk,Ib (66),(v) @@ -871,23 +909,201 @@ AVXcode: 3 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) -56: vreduceps/d Vx,Wx,Ib (66),(ev) -57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev) 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) -66: vfpclassps/d Vk,Wx,Ib (66),(ev) -67: vfpclassss/d Vk,Wx,Ib (66),(ev) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev) 70: vpshldw Vx,Hx,Wx,Ib (66),(ev) 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev) 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev) 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev) +c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev) cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66) +de: vsm3rnds2 Vdq,Hdq,Wdq,Ib (66),(v1) df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) -f0: RORX Gy,Ey,Ib (F2),(v) +f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B) +EndTable + +Table: EVEX map 4 +Referrer: +AVXcode: 4 +00: ADD Eb,Gb (ev) +01: ADD Ev,Gv (es) | ADD Ev,Gv (66),(es) +02: ADD Gb,Eb (ev) +03: ADD Gv,Ev (es) | ADD Gv,Ev (66),(es) +08: OR Eb,Gb (ev) +09: OR Ev,Gv (es) | OR Ev,Gv (66),(es) +0a: OR Gb,Eb (ev) +0b: OR Gv,Ev (es) | OR Gv,Ev (66),(es) +10: ADC Eb,Gb (ev) +11: ADC Ev,Gv (es) | ADC Ev,Gv (66),(es) +12: ADC Gb,Eb (ev) +13: ADC Gv,Ev (es) | ADC Gv,Ev (66),(es) +18: SBB Eb,Gb (ev) +19: SBB Ev,Gv (es) | SBB Ev,Gv (66),(es) +1a: SBB Gb,Eb (ev) +1b: SBB Gv,Ev (es) | SBB Gv,Ev (66),(es) +20: AND Eb,Gb (ev) +21: AND Ev,Gv (es) | AND Ev,Gv (66),(es) +22: AND Gb,Eb (ev) +23: AND Gv,Ev (es) | AND Gv,Ev (66),(es) +24: SHLD Ev,Gv,Ib (es) | SHLD Ev,Gv,Ib (66),(es) +28: SUB Eb,Gb (ev) +29: SUB Ev,Gv (es) | SUB Ev,Gv (66),(es) +2a: SUB Gb,Eb (ev) +2b: SUB Gv,Ev (es) | SUB Gv,Ev (66),(es) +2c: SHRD Ev,Gv,Ib (es) | SHRD Ev,Gv,Ib (66),(es) +30: XOR Eb,Gb (ev) +31: XOR Ev,Gv (es) | XOR Ev,Gv (66),(es) +32: XOR Gb,Eb (ev) +33: XOR Gv,Ev (es) | XOR Gv,Ev (66),(es) +# CCMPSCC instructions are: CCOMB, CCOMBE, CCOMF, CCOML, CCOMLE, CCOMNB, CCOMNBE, CCOMNL, CCOMNLE, +# CCOMNO, CCOMNS, CCOMNZ, CCOMO, CCOMS, CCOMT, CCOMZ +38: CCMPSCC Eb,Gb (ev) +39: CCMPSCC Ev,Gv (es) | CCMPSCC Ev,Gv (66),(es) +3a: CCMPSCC Gv,Ev (ev) +3b: CCMPSCC Gv,Ev (es) | CCMPSCC Gv,Ev (66),(es) +40: CMOVO Gv,Ev (es) | CMOVO Gv,Ev (66),(es) | CFCMOVO Ev,Ev (es) | CFCMOVO Ev,Ev (66),(es) | SETO Eb (F2),(ev) +41: CMOVNO Gv,Ev (es) | CMOVNO Gv,Ev (66),(es) | CFCMOVNO Ev,Ev (es) | CFCMOVNO Ev,Ev (66),(es) | SETNO Eb (F2),(ev) +42: CMOVB Gv,Ev (es) | CMOVB Gv,Ev (66),(es) | CFCMOVB Ev,Ev (es) | CFCMOVB Ev,Ev (66),(es) | SETB Eb (F2),(ev) +43: CMOVNB Gv,Ev (es) | CMOVNB Gv,Ev (66),(es) | CFCMOVNB Ev,Ev (es) | CFCMOVNB Ev,Ev (66),(es) | SETNB Eb (F2),(ev) +44: CMOVZ Gv,Ev (es) | CMOVZ Gv,Ev (66),(es) | CFCMOVZ Ev,Ev (es) | CFCMOVZ Ev,Ev (66),(es) | SETZ Eb (F2),(ev) +45: CMOVNZ Gv,Ev (es) | CMOVNZ Gv,Ev (66),(es) | CFCMOVNZ Ev,Ev (es) | CFCMOVNZ Ev,Ev (66),(es) | SETNZ Eb (F2),(ev) +46: CMOVBE Gv,Ev (es) | CMOVBE Gv,Ev (66),(es) | CFCMOVBE Ev,Ev (es) | CFCMOVBE Ev,Ev (66),(es) | SETBE Eb (F2),(ev) +47: CMOVNBE Gv,Ev (es) | CMOVNBE Gv,Ev (66),(es) | CFCMOVNBE Ev,Ev (es) | CFCMOVNBE Ev,Ev (66),(es) | SETNBE Eb (F2),(ev) +48: CMOVS Gv,Ev (es) | CMOVS Gv,Ev (66),(es) | CFCMOVS Ev,Ev (es) | CFCMOVS Ev,Ev (66),(es) | SETS Eb (F2),(ev) +49: CMOVNS Gv,Ev (es) | CMOVNS Gv,Ev (66),(es) | CFCMOVNS Ev,Ev (es) | CFCMOVNS Ev,Ev (66),(es) | SETNS Eb (F2),(ev) +4a: CMOVP Gv,Ev (es) | CMOVP Gv,Ev (66),(es) | CFCMOVP Ev,Ev (es) | CFCMOVP Ev,Ev (66),(es) | SETP Eb (F2),(ev) +4b: CMOVNP Gv,Ev (es) | CMOVNP Gv,Ev (66),(es) | CFCMOVNP Ev,Ev (es) | CFCMOVNP Ev,Ev (66),(es) | SETNP Eb (F2),(ev) +4c: CMOVL Gv,Ev (es) | CMOVL Gv,Ev (66),(es) | CFCMOVL Ev,Ev (es) | CFCMOVL Ev,Ev (66),(es) | SETL Eb (F2),(ev) +4d: CMOVNL Gv,Ev (es) | CMOVNL Gv,Ev (66),(es) | CFCMOVNL Ev,Ev (es) | CFCMOVNL Ev,Ev (66),(es) | SETNL Eb (F2),(ev) +4e: CMOVLE Gv,Ev (es) | CMOVLE Gv,Ev (66),(es) | CFCMOVLE Ev,Ev (es) | CFCMOVLE Ev,Ev (66),(es) | SETLE Eb (F2),(ev) +4f: CMOVNLE Gv,Ev (es) | CMOVNLE Gv,Ev (66),(es) | CFCMOVNLE Ev,Ev (es) | CFCMOVNLE Ev,Ev (66),(es) | SETNLE Eb (F2),(ev) +60: MOVBE Gv,Ev (es) | MOVBE Gv,Ev (66),(es) +61: MOVBE Ev,Gv (es) | MOVBE Ev,Gv (66),(es) +65: WRUSSD Md,Gd (66),(ev) | WRUSSQ Mq,Gq (66),(ev) +66: ADCX Gy,Ey (66),(ev) | ADOX Gy,Ey (F3),(ev) | WRSSD Md,Gd (ev) | WRSSQ Mq,Gq (66),(ev) +69: IMUL Gv,Ev,Iz (es) | IMUL Gv,Ev,Iz (66),(es) +6b: IMUL Gv,Ev,Ib (es) | IMUL Gv,Ev,Ib (66),(es) +80: Grp1 Eb,Ib (1A),(ev) +81: Grp1 Ev,Iz (1A),(es) +83: Grp1 Ev,Ib (1A),(es) +# CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, +# CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) +88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) +8f: POP2 Bq,Rq (000),(11B),(ev) +a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) +ad: SHRD Ev,Gv,CL (es) | SHRD Ev,Gv,CL (66),(es) +af: IMUL Gv,Ev (es) | IMUL Gv,Ev (66),(es) +c0: Grp2 Eb,Ib (1A),(ev) +c1: Grp2 Ev,Ib (1A),(es) +d0: Grp2 Eb,1 (1A),(ev) +d1: Grp2 Ev,1 (1A),(es) +d2: Grp2 Eb,CL (1A),(ev) +d3: Grp2 Ev,CL (1A),(es) +f0: CRC32 Gy,Eb (es) | INVEPT Gq,Mdq (F3),(ev) +f1: CRC32 Gy,Ey (es) | CRC32 Gy,Ey (66),(es) | INVVPID Gy,Mdq (F3),(ev) +f2: INVPCID Gy,Mdq (F3),(ev) +f4: TZCNT Gv,Ev (es) | TZCNT Gv,Ev (66),(es) +f5: LZCNT Gv,Ev (es) | LZCNT Gv,Ev (66),(es) +f6: Grp3_1 Eb (1A),(ev) +f7: Grp3_2 Ev (1A),(es) +f8: MOVDIR64B Gv,Mdqq (66),(ev) | ENQCMD Gv,Mdqq (F2),(ev) | ENQCMDS Gv,Mdqq (F3),(ev) | URDMSR Rq,Gq (F2),(11B),(ev) | UWRMSR Gq,Rq (F3),(11B),(ev) +f9: MOVDIRI My,Gy (ev) +fe: Grp4 (1A),(ev) +ff: Grp5 (1A),(es) | PUSH2 Bq,Rq (110),(11B),(ev) +EndTable + +Table: EVEX map 5 +Referrer: +AVXcode: 5 +10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev) +11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev) +1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev) +2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev) +2c: vcvttsh2si Vx,Wx (F3),(ev) +2d: vcvtsh2si Vx,Wx (F3),(ev) +2e: vucomish Vx,Wx (ev) +2f: vcomish Vx,Wx (ev) +51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev) +58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev) +59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev) +5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev) +5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev) +5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev) +5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev) +5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev) +5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev) +6e: vmovw Vx,Wx (66),(ev) +78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev) +79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev) +7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev) +7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev) +7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev) +7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev) +7e: vmovw Wx,Vx (66),(ev) +EndTable + +Table: EVEX map 6 +Referrer: +AVXcode: 6 +13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev) +2c: vscalefph Vx,Hx,Wx (66),(ev) +2d: vscalefsh Vx,Hx,Wx (66),(ev) +42: vgetexpph Vx,Wx (66),(ev) +43: vgetexpsh Vx,Hx,Wx (66),(ev) +4c: vrcpph Vx,Wx (66),(ev) +4d: vrcpsh Vx,Hx,Wx (66),(ev) +4e: vrsqrtph Vx,Wx (66),(ev) +4f: vrsqrtsh Vx,Hx,Wx (66),(ev) +56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev) +57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev) +96: vfmaddsub132ph Vx,Hx,Wx (66),(ev) +97: vfmsubadd132ph Vx,Hx,Wx (66),(ev) +98: vfmadd132ph Vx,Hx,Wx (66),(ev) +99: vfmadd132sh Vx,Hx,Wx (66),(ev) +9a: vfmsub132ph Vx,Hx,Wx (66),(ev) +9b: vfmsub132sh Vx,Hx,Wx (66),(ev) +9c: vfnmadd132ph Vx,Hx,Wx (66),(ev) +9d: vfnmadd132sh Vx,Hx,Wx (66),(ev) +9e: vfnmsub132ph Vx,Hx,Wx (66),(ev) +9f: vfnmsub132sh Vx,Hx,Wx (66),(ev) +a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev) +a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev) +a8: vfmadd213ph Vx,Hx,Wx (66),(ev) +a9: vfmadd213sh Vx,Hx,Wx (66),(ev) +aa: vfmsub213ph Vx,Hx,Wx (66),(ev) +ab: vfmsub213sh Vx,Hx,Wx (66),(ev) +ac: vfnmadd213ph Vx,Hx,Wx (66),(ev) +ad: vfnmadd213sh Vx,Hx,Wx (66),(ev) +ae: vfnmsub213ph Vx,Hx,Wx (66),(ev) +af: vfnmsub213sh Vx,Hx,Wx (66),(ev) +b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev) +b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev) +b8: vfmadd231ph Vx,Hx,Wx (66),(ev) +b9: vfmadd231sh Vx,Hx,Wx (66),(ev) +ba: vfmsub231ph Vx,Hx,Wx (66),(ev) +bb: vfmsub231sh Vx,Hx,Wx (66),(ev) +bc: vfnmadd231ph Vx,Hx,Wx (66),(ev) +bd: vfnmadd231sh Vx,Hx,Wx (66),(ev) +be: vfnmsub231ph Vx,Hx,Wx (66),(ev) +bf: vfnmsub231sh Vx,Hx,Wx (66),(ev) +d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev) +d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev) +EndTable + +Table: VEX map 7 +Referrer: +AVXcode: 7 +f8: URDMSR Rq,Id (F2),(v1),(11B) | UWRMSR Id,Rq (F3),(v1),(11B) EndTable GrpTable: Grp1 @@ -962,15 +1178,16 @@ GrpTable: Grp6 3: LTR Ew 4: VERR Ew 5: VERW Ew +6: LKGS Ew (F2) EndTable GrpTable: Grp7 -0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B) -1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B) +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B) | WRMSRNS (110),(11B) | RDMSRLIST (F2),(110),(11B) | WRMSRLIST (F3),(110),(11B) | PBNDKB (111),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B) | ERETU (F3),(010),(11B) | ERETS (F2),(010),(11B) 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -987,7 +1204,7 @@ GrpTable: Grp9 3: xrstors 4: xsavec 5: xsaves -6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3) 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable @@ -1051,6 +1268,8 @@ GrpTable: Grp16 1: prefetch T0 2: prefetch T1 3: prefetch T2 +6: prefetch IT1 +7: prefetch IT0 EndTable GrpTable: Grp17 |