summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authormiod <miod@openbsd.org>2016-11-04 17:30:30 +0000
committermiod <miod@openbsd.org>2016-11-04 17:30:30 +0000
commite60c46c4c9a5c27f2a64a9eeda73a83a36b3f7a8 (patch)
tree4a1d8de71c6b778094c6bb11703e3e5b3ad28f83 /lib
parentBe verbose when PATCH_PATH is not set (that is temporary until we agree on (diff)
downloadwireguard-openbsd-e60c46c4c9a5c27f2a64a9eeda73a83a36b3f7a8.tar.xz
wireguard-openbsd-e60c46c4c9a5c27f2a64a9eeda73a83a36b3f7a8.zip
Replace all uses of magic numbers when operating on OPENSSL_ia32_P[] by
meaningful constants in a private header file, so that reviewers can actually get a chance to figure out what the code is attempting to do without knowing all cpuid bits. While there, turn it from an array of two 32-bit ints into a properly aligned 64-bit int. Use of OPENSSL_ia32_P is now restricted to the assembler parts. C code will now always use OPENSSL_cpu_caps() and check for the proper bits in the whole 64-bit word it returns. i386 tests and ok jsing@
Diffstat (limited to 'lib')
-rw-r--r--lib/libcrypto/aes/asm/aes-586.pl10
-rwxr-xr-xlib/libcrypto/aes/asm/aes-x86_64.pl6
-rw-r--r--lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl7
-rw-r--r--lib/libcrypto/bn/asm/bn-586.pl12
-rw-r--r--lib/libcrypto/bn/asm/x86-gf2m.pl6
-rwxr-xr-xlib/libcrypto/bn/asm/x86-mont.pl2
-rw-r--r--lib/libcrypto/bn/asm/x86_64-gf2m.pl5
-rw-r--r--lib/libcrypto/cryptlib.c27
-rw-r--r--lib/libcrypto/cryptlib.h3
-rw-r--r--lib/libcrypto/engine/eng_aesni.c16
-rw-r--r--lib/libcrypto/evp/e_aes.c8
-rw-r--r--lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c11
-rw-r--r--lib/libcrypto/evp/e_rc4_hmac_md5.c8
-rw-r--r--lib/libcrypto/modes/gcm128.c19
-rwxr-xr-xlib/libcrypto/perlasm/x86_64-xlate.pl4
-rw-r--r--lib/libcrypto/perlasm/x86asm.pl1
-rw-r--r--lib/libcrypto/perlasm/x86gas.pl6
-rw-r--r--lib/libcrypto/rc4/asm/rc4-586.pl9
-rwxr-xr-xlib/libcrypto/rc4/asm/rc4-x86_64.pl9
-rw-r--r--lib/libcrypto/sha/asm/sha1-586.pl10
-rwxr-xr-xlib/libcrypto/sha/asm/sha1-x86_64.pl9
-rw-r--r--lib/libcrypto/sha/asm/sha512-586.pl2
-rw-r--r--lib/libcrypto/whrlpool/wp_block.c19
-rw-r--r--lib/libcrypto/x86_64cpuid.pl36
-rw-r--r--lib/libcrypto/x86_arch.h90
-rw-r--r--lib/libcrypto/x86cpuid.pl56
26 files changed, 245 insertions, 146 deletions
diff --git a/lib/libcrypto/aes/asm/aes-586.pl b/lib/libcrypto/aes/asm/aes-586.pl
index aab40e6f1cf..3ba8a26eaa3 100644
--- a/lib/libcrypto/aes/asm/aes-586.pl
+++ b/lib/libcrypto/aes/asm/aes-586.pl
@@ -1187,7 +1187,7 @@ sub enclast()
&lea ($tbl,&DWP(2048+128,$tbl,$s1));
if (!$x86only) {
- &bt (&DWP(0,$s0),25); # check for SSE bit
+ &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit
&jnc (&label("x86"));
&movq ("mm0",&QWP(0,$acc));
@@ -1976,7 +1976,7 @@ sub declast()
&lea ($tbl,&DWP(2048+128,$tbl,$s1));
if (!$x86only) {
- &bt (&DWP(0,$s0),25); # check for SSE bit
+ &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit
&jnc (&label("x86"));
&movq ("mm0",&QWP(0,$acc));
@@ -2054,7 +2054,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
&test ($s2,15);
&jnz (&label("slow_way"));
if (!$x86only) {
- &bt (&DWP(0,$s0),28); # check for hyper-threading bit
+ &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_HT"); # check for hyper-threading bit
&jc (&label("slow_way"));
}
# pre-allocate aligned stack frame...
@@ -2364,7 +2364,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
&jb (&label("slow_enc_tail"));
if (!$x86only) {
- &bt ($_tmp,25); # check for SSE bit
+ &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit
&jnc (&label("slow_enc_x86"));
&movq ("mm0",&QWP(0,$key)); # load iv
@@ -2479,7 +2479,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
#--------------------------- SLOW DECRYPT ---------------------------#
&set_label("slow_decrypt",16);
if (!$x86only) {
- &bt ($_tmp,25); # check for SSE bit
+ &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit
&jnc (&label("slow_dec_loop_x86"));
&set_label("slow_dec_loop_sse",4);
diff --git a/lib/libcrypto/aes/asm/aes-x86_64.pl b/lib/libcrypto/aes/asm/aes-x86_64.pl
index f75e90ba87d..c37fd55648b 100755
--- a/lib/libcrypto/aes/asm/aes-x86_64.pl
+++ b/lib/libcrypto/aes/asm/aes-x86_64.pl
@@ -1655,6 +1655,7 @@ $code.=<<___;
.type AES_cbc_encrypt,\@function,6
.align 16
.extern OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
.globl asm_AES_cbc_encrypt
.hidden asm_AES_cbc_encrypt
asm_AES_cbc_encrypt:
@@ -1684,7 +1685,7 @@ AES_cbc_encrypt:
jb .Lcbc_slow_prologue
test \$15,%rdx
jnz .Lcbc_slow_prologue
- bt \$28,%r10d
+ bt \$IA32CAP_BIT0_HT,%r10d
jc .Lcbc_slow_prologue
# allocate aligned stack frame...
@@ -1944,7 +1945,7 @@ AES_cbc_encrypt:
lea ($key,%rax),%rax
mov %rax,$keyend
- # pick Te4 copy which can't "overlap" with stack frame or key scdedule
+ # pick Te4 copy which can't "overlap" with stack frame or key schedule
lea 2048($sbox),$sbox
lea 768-8(%rsp),%rax
sub $sbox,%rax
@@ -2814,6 +2815,7 @@ ___
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+print "#include \"x86_arch.h\"\n";
print $code;
close STDOUT;
diff --git a/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
index 39b504cbe58..bc6c8f3fc08 100644
--- a/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
+++ b/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
@@ -83,6 +83,7 @@ open OUT,"| \"$^X\" $xlate $flavour $output";
$code.=<<___;
.text
.extern OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
.globl aesni_cbc_sha1_enc
.type aesni_cbc_sha1_enc,\@abi-omnipotent
@@ -93,10 +94,10 @@ aesni_cbc_sha1_enc:
mov OPENSSL_ia32cap_P+4(%rip),%r11d
___
$code.=<<___ if ($avx);
- and \$`1<<28`,%r11d # mask AVX bit
- and \$`1<<30`,%r10d # mask "Intel CPU" bit
+ and \$IA32CAP_MASK1_AVX,%r11d # mask AVX bit
+ and \$IA32CAP_MASK0_INTEL,%r10d # mask "Intel CPU" bit
or %r11d,%r10d
- cmp \$`1<<28|1<<30`,%r10d
+ cmp \$(IA32CAP_MASK1_AVX|IA32CAP_MASK0_INTEL),%r10d
je aesni_cbc_sha1_enc_avx
___
$code.=<<___;
diff --git a/lib/libcrypto/bn/asm/bn-586.pl b/lib/libcrypto/bn/asm/bn-586.pl
index 332ef3e91d6..c4e2baa6c5a 100644
--- a/lib/libcrypto/bn/asm/bn-586.pl
+++ b/lib/libcrypto/bn/asm/bn-586.pl
@@ -25,7 +25,7 @@ sub bn_mul_add_words
{
local($name)=@_;
- &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+ &function_begin_B($name,"");
$r="eax";
$a="edx";
@@ -33,7 +33,7 @@ sub bn_mul_add_words
if ($sse2) {
&picmeup("eax","OPENSSL_ia32cap_P");
- &bt(&DWP(0,"eax"),26);
+ &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("maw_non_sse2"));
&mov($r,&wparam(0));
@@ -211,7 +211,7 @@ sub bn_mul_words
{
local($name)=@_;
- &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+ &function_begin_B($name,"");
$r="eax";
$a="edx";
@@ -219,7 +219,7 @@ sub bn_mul_words
if ($sse2) {
&picmeup("eax","OPENSSL_ia32cap_P");
- &bt(&DWP(0,"eax"),26);
+ &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("mw_non_sse2"));
&mov($r,&wparam(0));
@@ -322,7 +322,7 @@ sub bn_sqr_words
{
local($name)=@_;
- &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+ &function_begin_B($name,"");
$r="eax";
$a="edx";
@@ -330,7 +330,7 @@ sub bn_sqr_words
if ($sse2) {
&picmeup("eax","OPENSSL_ia32cap_P");
- &bt(&DWP(0,"eax"),26);
+ &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("sqr_non_sse2"));
&mov($r,&wparam(0));
diff --git a/lib/libcrypto/bn/asm/x86-gf2m.pl b/lib/libcrypto/bn/asm/x86-gf2m.pl
index 808a1e59691..97d91362602 100644
--- a/lib/libcrypto/bn/asm/x86-gf2m.pl
+++ b/lib/libcrypto/bn/asm/x86-gf2m.pl
@@ -203,12 +203,12 @@ if (!$x86only) {
&picmeup("edx","OPENSSL_ia32cap_P");
&mov ("eax",&DWP(0,"edx"));
&mov ("edx",&DWP(4,"edx"));
- &test ("eax",1<<23); # check MMX bit
+ &test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit
&jz (&label("ialu"));
if ($sse2) {
- &test ("eax",1<<24); # check FXSR bit
+ &test ("eax","\$IA32CAP_MASK0_FXSR"); # check FXSR bit
&jz (&label("mmx"));
- &test ("edx",1<<1); # check PCLMULQDQ bit
+ &test ("edx","\$IA32CAP_MASK1_PCLMUL"); # check PCLMULQDQ bit
&jz (&label("mmx"));
&movups ("xmm0",&QWP(8,"esp"));
diff --git a/lib/libcrypto/bn/asm/x86-mont.pl b/lib/libcrypto/bn/asm/x86-mont.pl
index e8f6b050842..a0bdd5787e5 100755
--- a/lib/libcrypto/bn/asm/x86-mont.pl
+++ b/lib/libcrypto/bn/asm/x86-mont.pl
@@ -114,7 +114,7 @@ $temp="mm6";
$mask="mm7";
&picmeup("eax","OPENSSL_ia32cap_P");
- &bt (&DWP(0,"eax"),26);
+ &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("non_sse2"));
&mov ("eax",-1);
diff --git a/lib/libcrypto/bn/asm/x86_64-gf2m.pl b/lib/libcrypto/bn/asm/x86_64-gf2m.pl
index 8e45c7479b3..3ecb425dad2 100644
--- a/lib/libcrypto/bn/asm/x86_64-gf2m.pl
+++ b/lib/libcrypto/bn/asm/x86_64-gf2m.pl
@@ -163,12 +163,13 @@ ___
$code.=<<___;
.extern OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
.globl bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,\@abi-omnipotent
.align 16
bn_GF2m_mul_2x2:
- mov OPENSSL_ia32cap_P(%rip),%rax
- bt \$33,%rax
+ mov OPENSSL_ia32cap_P+4(%rip),%eax
+ bt \$IA32CAP_BIT1_PCLMUL,%eax
jnc .Lvanilla_mul_2x2
movd $a1,%xmm0
diff --git a/lib/libcrypto/cryptlib.c b/lib/libcrypto/cryptlib.c
index fa091fbaeac..8dec9caa93e 100644
--- a/lib/libcrypto/cryptlib.c
+++ b/lib/libcrypto/cryptlib.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cryptlib.c,v 1.38 2016/11/04 13:56:04 miod Exp $ */
+/* $OpenBSD: cryptlib.c,v 1.39 2016/11/04 17:30:30 miod Exp $ */
/* ====================================================================
* Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
*
@@ -627,47 +627,30 @@ CRYPTO_get_lock_name(int type)
defined(__INTEL__) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
-unsigned int OPENSSL_ia32cap_P[2];
+uint64_t OPENSSL_ia32cap_P;
uint64_t
OPENSSL_cpu_caps(void)
{
- return *(uint64_t *)OPENSSL_ia32cap_P;
+ return OPENSSL_ia32cap_P;
}
#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_CPUID_SETUP
-typedef unsigned long long IA32CAP;
void
OPENSSL_cpuid_setup(void)
{
static int trigger = 0;
- IA32CAP OPENSSL_ia32_cpuid(void);
- IA32CAP vec;
+ uint64_t OPENSSL_ia32_cpuid(void);
if (trigger)
return;
trigger = 1;
-
- vec = OPENSSL_ia32_cpuid();
-
- /*
- * |(1<<10) sets a reserved bit to signal that variable
- * was initialized already... This is to avoid interference
- * with cpuid snippets in ELF .init segment.
- */
- OPENSSL_ia32cap_P[0] = (unsigned int)vec | (1 << 10);
- OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32);
+ OPENSSL_ia32cap_P = OPENSSL_ia32_cpuid();
}
#endif
#else
-unsigned long *
-OPENSSL_ia32cap_loc(void)
-{
- return NULL;
-}
-
uint64_t
OPENSSL_cpu_caps(void)
{
diff --git a/lib/libcrypto/cryptlib.h b/lib/libcrypto/cryptlib.h
index ad679dfa8d4..d44738bf3c0 100644
--- a/lib/libcrypto/cryptlib.h
+++ b/lib/libcrypto/cryptlib.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cryptlib.h,v 1.24 2014/07/11 08:44:47 jsing Exp $ */
+/* $OpenBSD: cryptlib.h,v 1.25 2016/11/04 17:30:30 miod Exp $ */
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
@@ -73,7 +73,6 @@ extern "C" {
#define X509_CERT_FILE_EVP "SSL_CERT_FILE"
void OPENSSL_cpuid_setup(void);
-extern unsigned int OPENSSL_ia32cap_P[];
#ifdef __cplusplus
}
diff --git a/lib/libcrypto/engine/eng_aesni.c b/lib/libcrypto/engine/eng_aesni.c
index 5f9a36236ac..92794f60868 100644
--- a/lib/libcrypto/engine/eng_aesni.c
+++ b/lib/libcrypto/engine/eng_aesni.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: eng_aesni.c,v 1.8 2015/02/10 09:46:30 miod Exp $ */
+/* $OpenBSD: eng_aesni.c,v 1.9 2016/11/04 17:30:30 miod Exp $ */
/*
* Support for Intel AES-NI intruction set
* Author: Huang Ying <ying.huang@intel.com>
@@ -93,10 +93,11 @@
defined(_M_AMD64) || defined(_M_X64) || \
defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) && !defined(__i386__)
#define COMPILE_HW_AESNI
+#include "x86_arch.h"
#endif
-static ENGINE *ENGINE_aesni (void);
+static ENGINE *ENGINE_aesni(void);
-void ENGINE_load_aesni (void)
+void ENGINE_load_aesni(void)
{
/* On non-x86 CPUs it just returns. */
#ifdef COMPILE_HW_AESNI
@@ -302,20 +303,13 @@ aesni_ofb128_encrypt(const unsigned char *in, unsigned char *out,
}
/* ===== Engine "management" functions ===== */
-typedef unsigned long long IA32CAP;
-
/* Prepare the ENGINE structure for registration */
static int
aesni_bind_helper(ENGINE *e)
{
int engage;
- if (sizeof(OPENSSL_ia32cap_P) > 4) {
- engage = ((IA32CAP)OPENSSL_ia32cap_P >> 57) & 1;
- } else {
- IA32CAP OPENSSL_ia32_cpuid(void);
- engage = (OPENSSL_ia32_cpuid() >> 57) & 1;
- }
+ engage = (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) != 0;
/* Register everything or return with an error */
if (!ENGINE_set_id(e, aesni_id) ||
diff --git a/lib/libcrypto/evp/e_aes.c b/lib/libcrypto/evp/e_aes.c
index 25199dca366..b20543a90ca 100644
--- a/lib/libcrypto/evp/e_aes.c
+++ b/lib/libcrypto/evp/e_aes.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: e_aes.c,v 1.30 2016/11/04 13:56:05 miod Exp $ */
+/* $OpenBSD: e_aes.c,v 1.31 2016/11/04 17:30:30 miod Exp $ */
/* ====================================================================
* Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved.
*
@@ -150,10 +150,10 @@ void AES_xts_decrypt(const char *inp, char *out, size_t len,
defined(_M_AMD64) || defined(_M_X64) || \
defined(__INTEL__) )
-extern unsigned int OPENSSL_ia32cap_P[];
+#include "x86_arch.h"
#ifdef VPAES_ASM
-#define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32)))
+#define VPAES_CAPABLE (OPENSSL_cpu_caps() & CPUCAP_MASK_SSSE3)
#endif
#ifdef BSAES_ASM
#define BSAES_CAPABLE VPAES_CAPABLE
@@ -161,7 +161,7 @@ extern unsigned int OPENSSL_ia32cap_P[];
/*
* AES-NI section
*/
-#define AESNI_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(57-32)))
+#define AESNI_CAPABLE (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI)
int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
AES_KEY *key);
diff --git a/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c b/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c
index 8574823aed3..3f82cf59670 100644
--- a/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c
+++ b/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: e_aes_cbc_hmac_sha1.c,v 1.12 2016/05/04 15:01:33 tedu Exp $ */
+/* $OpenBSD: e_aes_cbc_hmac_sha1.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */
/* ====================================================================
* Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
*
@@ -87,13 +87,12 @@ typedef struct {
defined(_M_AMD64) || defined(_M_X64) || \
defined(__INTEL__) )
+#include "x86_arch.h"
+
#if defined(__GNUC__) && __GNUC__>=2
# define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; })
#endif
-extern unsigned int OPENSSL_ia32cap_P[2];
-#define AESNI_CAPABLE (1<<(57-32))
-
int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key);
int aesni_set_decrypt_key(const unsigned char *userKey, int bits, AES_KEY *key);
@@ -578,14 +577,14 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = {
const EVP_CIPHER *
EVP_aes_128_cbc_hmac_sha1(void)
{
- return OPENSSL_ia32cap_P[1] & AESNI_CAPABLE ?
+ return (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) ?
&aesni_128_cbc_hmac_sha1_cipher : NULL;
}
const EVP_CIPHER *
EVP_aes_256_cbc_hmac_sha1(void)
{
- return OPENSSL_ia32cap_P[1] & AESNI_CAPABLE ?
+ return (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) ?
&aesni_256_cbc_hmac_sha1_cipher : NULL;
}
#else
diff --git a/lib/libcrypto/evp/e_rc4_hmac_md5.c b/lib/libcrypto/evp/e_rc4_hmac_md5.c
index 1f085af4039..39527cafe69 100644
--- a/lib/libcrypto/evp/e_rc4_hmac_md5.c
+++ b/lib/libcrypto/evp/e_rc4_hmac_md5.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: e_rc4_hmac_md5.c,v 1.5 2014/08/11 13:29:43 bcook Exp $ */
+/* $OpenBSD: e_rc4_hmac_md5.c,v 1.6 2016/11/04 17:30:30 miod Exp $ */
/* ====================================================================
* Copyright (c) 2011 The OpenSSL Project. All rights reserved.
*
@@ -105,6 +105,7 @@ rc4_hmac_md5_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *inkey,
defined(__INTEL__) ) && \
!(defined(__APPLE__) && defined(__MACH__))
#define STITCHED_CALL
+#include "x86_arch.h"
#endif
#if !defined(STITCHED_CALL)
@@ -122,7 +123,6 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
md5_off = MD5_CBLOCK - key->md.num,
blocks;
unsigned int l;
- extern unsigned int OPENSSL_ia32cap_P[];
#endif
size_t plen = key->payload_length;
@@ -139,7 +139,7 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
if (plen > md5_off &&
(blocks = (plen - md5_off) / MD5_CBLOCK) &&
- (OPENSSL_ia32cap_P[0]&(1 << 20)) == 0) {
+ (OPENSSL_cpu_caps() & CPUCAP_MASK_INTELP4) == 0) {
MD5_Update(&key->md, in, md5_off);
RC4(&key->ks, rc4_off, in, out);
@@ -187,7 +187,7 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
rc4_off += MD5_CBLOCK;
if (len > rc4_off && (blocks = (len - rc4_off) / MD5_CBLOCK) &&
- (OPENSSL_ia32cap_P[0] & (1 << 20)) == 0) {
+ (OPENSSL_cpu_caps() & CPUCAP_MASK_INTELP4) == 0) {
RC4(&key->ks, rc4_off, in, out);
MD5_Update(&key->md, out, md5_off);
diff --git a/lib/libcrypto/modes/gcm128.c b/lib/libcrypto/modes/gcm128.c
index 6f8a8dd7f4f..95ee755f838 100644
--- a/lib/libcrypto/modes/gcm128.c
+++ b/lib/libcrypto/modes/gcm128.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: gcm128.c,v 1.14 2016/11/04 13:56:05 miod Exp $ */
+/* $OpenBSD: gcm128.c,v 1.15 2016/11/04 17:30:30 miod Exp $ */
/* ====================================================================
* Copyright (c) 2010 The OpenSSL Project. All rights reserved.
*
@@ -637,13 +637,19 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
#endif
+#if defined(GHASH_ASM) && \
+ (defined(__i386) || defined(__i386__) || \
+ defined(__x86_64) || defined(__x86_64__) || \
+ defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
+#include "x86_arch.h"
+#endif
+
#if TABLE_BITS==4 && defined(GHASH_ASM)
# if (defined(__i386) || defined(__i386__) || \
defined(__x86_64) || defined(__x86_64__) || \
defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
# define GHASH_ASM_X86_OR_64
# define GCM_FUNCREF_4BIT
-extern unsigned int OPENSSL_ia32cap_P[2];
void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
@@ -705,8 +711,9 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
#elif TABLE_BITS==4
# if defined(GHASH_ASM_X86_OR_64)
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
- if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
- OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
+ /* check FXSR and PCLMULQDQ bits */
+ if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) ==
+ (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) {
gcm_init_clmul(ctx->Htable,ctx->H.u);
ctx->gmult = gcm_gmult_clmul;
ctx->ghash = gcm_ghash_clmul;
@@ -716,9 +723,9 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
gcm_init_4bit(ctx->Htable,ctx->H.u);
# if defined(GHASH_ASM_X86) /* x86 only */
# if defined(OPENSSL_IA32_SSE2)
- if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
+ if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */
# else
- if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
+ if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) { /* check MMX bit */
# endif
ctx->gmult = gcm_gmult_4bit_mmx;
ctx->ghash = gcm_ghash_4bit_mmx;
diff --git a/lib/libcrypto/perlasm/x86_64-xlate.pl b/lib/libcrypto/perlasm/x86_64-xlate.pl
index 4bd53da33dd..a8393d27309 100755
--- a/lib/libcrypto/perlasm/x86_64-xlate.pl
+++ b/lib/libcrypto/perlasm/x86_64-xlate.pl
@@ -393,7 +393,7 @@ my %globals;
}
}
}
-{ package expr; # pick up expressioins
+{ package expr; # pick up expressions
sub re {
my $self = shift; # single instance is enough...
local *line = shift;
@@ -777,6 +777,8 @@ ___
OPTION DOTNAME
___
}
+print "#include \"x86_arch.h\"\n";
+
while($line=<>) {
chomp($line);
diff --git a/lib/libcrypto/perlasm/x86asm.pl b/lib/libcrypto/perlasm/x86asm.pl
index 5916ea4f893..e039382e009 100644
--- a/lib/libcrypto/perlasm/x86asm.pl
+++ b/lib/libcrypto/perlasm/x86asm.pl
@@ -248,6 +248,7 @@ EOF
$pic=0;
for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); }
+ ::emitraw("#include \"x86_arch.h\"\n");
::emitraw("#include <machine/asm.h>\n") if $openbsd;
$filename =~ s/\.pl$//;
&file($filename);
diff --git a/lib/libcrypto/perlasm/x86gas.pl b/lib/libcrypto/perlasm/x86gas.pl
index d4baea514bd..84d24edbbdb 100644
--- a/lib/libcrypto/perlasm/x86gas.pl
+++ b/lib/libcrypto/perlasm/x86gas.pl
@@ -157,10 +157,8 @@ sub ::file_end
}
}
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
- my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8";
- if ($::macosx) { push (@out,"$tmp,2\n"); }
- elsif ($::elf) { push (@out,"$tmp,4\n"); }
- else { push (@out,"$tmp\n"); }
+ push (@out, ".extern\t${nmdecor}OPENSSL_ia32cap_P\n");
+ push (@out, ".hidden\t${nmdecor}OPENSSL_ia32cap_P\n");
}
push(@out,$initseg) if ($initseg);
}
diff --git a/lib/libcrypto/rc4/asm/rc4-586.pl b/lib/libcrypto/rc4/asm/rc4-586.pl
index 84f1a798cb8..03f0cff467c 100644
--- a/lib/libcrypto/rc4/asm/rc4-586.pl
+++ b/lib/libcrypto/rc4/asm/rc4-586.pl
@@ -189,7 +189,8 @@ if ($alt=0) {
&jz (&label("go4loop4"));
&picmeup($out,"OPENSSL_ia32cap_P");
- &bt (&DWP(0,$out),26); # check SSE2 bit [could have been MMX]
+ # check SSE2 bit [could have been MMX]
+ &bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("go4loop4"));
&mov ($out,&wparam(3)) if (!$alt);
@@ -312,7 +313,7 @@ $idx="edx";
&xor ("eax","eax");
&mov (&DWP(-4,$out),$idi); # borrow key->y
- &bt (&DWP(0,$idx),20); # check for bit#20
+ &bt (&DWP(0,$idx),"\$IA32CAP_BIT0_INTELP4");
&jc (&label("c1stloop"));
&set_label("w1stloop",16);
@@ -388,9 +389,9 @@ $idx="edx";
&lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax"));
&picmeup("edx","OPENSSL_ia32cap_P");
&mov ("edx",&DWP(0,"edx"));
- &bt ("edx",20);
+ &bt ("edx","\$IA32CAP_BIT0_INTELP4");
&jc (&label("1xchar"));
- &bt ("edx",26);
+ &bt ("edx","\$IA32CAP_BIT0_SSE2");
&jnc (&label("ret"));
&add ("eax",25);
&ret ();
diff --git a/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/lib/libcrypto/rc4/asm/rc4-x86_64.pl
index 197749dda73..2135b38ef82 100755
--- a/lib/libcrypto/rc4/asm/rc4-x86_64.pl
+++ b/lib/libcrypto/rc4/asm/rc4-x86_64.pl
@@ -122,6 +122,7 @@ $out="%rcx"; # arg4
$code=<<___;
.text
.extern OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
.globl RC4
.type RC4,\@function,4
@@ -164,7 +165,7 @@ $code.=<<___;
movl ($dat,$XX[0],4),$TX[0]#d
test \$-16,$len
jz .Lloop1
- bt \$30,%r8d # Intel CPU?
+ bt \$IA32CAP_BIT0_INTEL,%r8d # Intel CPU?
jc .Lintel
and \$7,$TX[1]
lea 1($XX[0]),$XX[1]
@@ -442,7 +443,7 @@ RC4_set_key:
xor %r11,%r11
mov OPENSSL_ia32cap_P(%rip),$idx#d
- bt \$20,$idx#d # RC4_CHAR?
+ bt \$IA32CAP_BIT0_INTELP4,$idx#d # RC4_CHAR?
jc .Lc1stloop
jmp .Lw1stloop
@@ -506,9 +507,9 @@ RC4_set_key:
RC4_options:
lea .Lopts(%rip),%rax
mov OPENSSL_ia32cap_P(%rip),%edx
- bt \$20,%edx
+ bt \$IA32CAP_BIT0_INTELP4,%edx
jc .L8xchar
- bt \$30,%edx
+ bt \$IA32CAP_BIT0_INTEL,%edx
jnc .Ldone
add \$25,%rax
ret
diff --git a/lib/libcrypto/sha/asm/sha1-586.pl b/lib/libcrypto/sha/asm/sha1-586.pl
index 6fbea34d78c..d29ed84706a 100644
--- a/lib/libcrypto/sha/asm/sha1-586.pl
+++ b/lib/libcrypto/sha/asm/sha1-586.pl
@@ -303,15 +303,15 @@ if ($xmm) {
&mov ($A,&DWP(0,$T));
&mov ($D,&DWP(4,$T));
- &test ($D,1<<9); # check SSSE3 bit
+ &test ($D,"\$IA32CAP_MASK1_SSSE3"); # check SSSE3 bit
&jz (&label("x86"));
- &test ($A,1<<24); # check FXSR bit
+ &test ($A,"\$IA32CAP_MASK0_FXSR"); # check FXSR bit
&jz (&label("x86"));
if ($ymm) {
- &and ($D,1<<28); # mask AVX bit
- &and ($A,1<<30); # mask "Intel CPU" bit
+ &and ($D,"\$IA32CAP_MASK1_AVX"); # mask AVX bit
+ &and ($A,"\$IA32CAP_MASK0_INTEL"); # mask "Intel CPU" bit
&or ($A,$D);
- &cmp ($A,1<<28|1<<30);
+ &cmp ($A,"\$(IA32CAP_MASK1_AVX | IA32CAP_MASK0_INTEL)");
&je (&label("avx_shortcut"));
}
&jmp (&label("ssse3_shortcut"));
diff --git a/lib/libcrypto/sha/asm/sha1-x86_64.pl b/lib/libcrypto/sha/asm/sha1-x86_64.pl
index f15c7ec39b2..147d21570bc 100755
--- a/lib/libcrypto/sha/asm/sha1-x86_64.pl
+++ b/lib/libcrypto/sha/asm/sha1-x86_64.pl
@@ -216,6 +216,7 @@ unshift(@xi,pop(@xi));
$code.=<<___;
.text
.extern OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
.globl sha1_block_data_order
.type sha1_block_data_order,\@function,3
@@ -223,14 +224,14 @@ $code.=<<___;
sha1_block_data_order:
mov OPENSSL_ia32cap_P+0(%rip),%r9d
mov OPENSSL_ia32cap_P+4(%rip),%r8d
- test \$`1<<9`,%r8d # check SSSE3 bit
+ test \$IA32CAP_MASK1_SSSE3,%r8d # check SSSE3 bit
jz .Lialu
___
$code.=<<___ if ($avx);
- and \$`1<<28`,%r8d # mask AVX bit
- and \$`1<<30`,%r9d # mask "Intel CPU" bit
+ and \$IA32CAP_MASK1_AVX,%r8d # mask AVX bit
+ and \$IA32CAP_MASK0_INTEL,%r9d # mask "Intel CPU" bit
or %r9d,%r8d
- cmp \$`1<<28|1<<30`,%r8d
+ cmp \$(IA32CAP_MASK0_INTEL | IA32CAP_MASK1_AVX),%r8d
je _avx_shortcut
___
$code.=<<___;
diff --git a/lib/libcrypto/sha/asm/sha512-586.pl b/lib/libcrypto/sha/asm/sha512-586.pl
index 7eab6a5b88b..163361ebe9d 100644
--- a/lib/libcrypto/sha/asm/sha512-586.pl
+++ b/lib/libcrypto/sha/asm/sha512-586.pl
@@ -284,7 +284,7 @@ sub BODY_00_15_x86 {
if ($sse2) {
&picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
- &bt (&DWP(0,"edx"),26);
+ &bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("loop_x86"));
# load ctx->h[0-7]
diff --git a/lib/libcrypto/whrlpool/wp_block.c b/lib/libcrypto/whrlpool/wp_block.c
index d8c1b89ba38..1e00a013304 100644
--- a/lib/libcrypto/whrlpool/wp_block.c
+++ b/lib/libcrypto/whrlpool/wp_block.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: wp_block.c,v 1.12 2016/09/04 14:06:46 jsing Exp $ */
+/* $OpenBSD: wp_block.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */
/**
* The Whirlpool hashing function.
*
@@ -36,10 +36,12 @@
*
*/
-#include "wp_locl.h"
#include <string.h>
+#include <openssl/crypto.h>
#include <machine/endian.h>
+#include "wp_locl.h"
+
typedef unsigned char u8;
#if defined(_LP64)
typedef unsigned long u64;
@@ -57,12 +59,15 @@ typedef unsigned long long u64;
# define OPENSSL_SMALL_FOOTPRINT /* it appears that for elder non-MMX
CPUs this is actually faster! */
# endif
-# define GO_FOR_MMX(ctx,inp,num) do { \
- extern unsigned int OPENSSL_ia32cap_P[]; \
+#include "x86_arch.h"
+# define GO_FOR_MMX(ctx,inp,num) \
+do { \
void whirlpool_block_mmx(void *,const void *,size_t); \
- if (!(OPENSSL_ia32cap_P[0] & (1<<23))) break; \
- whirlpool_block_mmx(ctx->H.c,inp,num); return; \
- } while (0)
+ if ((OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) == 0) \
+ break; \
+ whirlpool_block_mmx(ctx->H.c,inp,num); \
+ return; \
+} while (0)
# endif
#elif defined(__arm__)
# define SMALL_REGISTER_BANK
diff --git a/lib/libcrypto/x86_64cpuid.pl b/lib/libcrypto/x86_64cpuid.pl
index b36d3f7dc5d..6558dedb6be 100644
--- a/lib/libcrypto/x86_64cpuid.pl
+++ b/lib/libcrypto/x86_64cpuid.pl
@@ -20,8 +20,8 @@ print<<___;
.section .init
call OPENSSL_cpuid_setup
+.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
-.comm OPENSSL_ia32cap_P,8,4
.text
@@ -80,8 +80,8 @@ OPENSSL_ia32_cpuid:
mov %eax,%r10d
mov \$0x80000001,%eax
cpuid
- or %ecx,%r9d
- and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
+ and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP bit
+ or \$1,%r9d # make sure %r9d is not zero
cmp \$0x80000008,%r10d
jb .Lintel
@@ -93,12 +93,12 @@ OPENSSL_ia32_cpuid:
mov \$1,%eax
cpuid
- bt \$28,%edx # test hyper-threading bit
+ bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit
jnc .Lgeneric
shr \$16,%ebx # number of logical processors
cmp %r10b,%bl
ja .Lgeneric
- and \$0xefffffff,%edx # ~(1<<28)
+ xor \$IA32CAP_MASK0_HT,%edx
jmp .Lgeneric
.Lintel:
@@ -116,33 +116,37 @@ OPENSSL_ia32_cpuid:
.Lnocacheinfo:
mov \$1,%eax
cpuid
- and \$0xbfefffff,%edx # force reserved bits to 0
+ # force reserved bits to 0
+ and \$(~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)),%edx
cmp \$0,%r9d
jne .Lnotintel
- or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
+ # set reserved bit#30 on Intel CPUs
+ or \$IA32CAP_MASK0_INTEL,%edx
and \$15,%ah
cmp \$15,%ah # examine Family ID
jne .Lnotintel
- or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
+ # set reserved bit#20 to engage RC4_CHAR
+ or \$IA32CAP_MASK0_INTELP4,%edx
.Lnotintel:
- bt \$28,%edx # test hyper-threading bit
+ bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit
jnc .Lgeneric
- and \$0xefffffff,%edx # ~(1<<28)
+ xor \$IA32CAP_MASK0_HT,%edx
cmp \$0,%r10d
je .Lgeneric
- or \$0x10000000,%edx # 1<<28
+ or \$IA32CAP_MASK0_HT,%edx
shr \$16,%ebx
cmp \$1,%bl # see if cache is shared
ja .Lgeneric
- and \$0xefffffff,%edx # ~(1<<28)
+ xor \$IA32CAP_MASK0_HT,%edx # clear hyper-threading bit if not
+
.Lgeneric:
- and \$0x00000800,%r9d # isolate AMD XOP flag
- and \$0xfffff7ff,%ecx
+ and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP flag
+ and \$(~IA32CAP_MASK1_AMD_XOP),%ecx
or %ecx,%r9d # merge AMD XOP flag
mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
- bt \$27,%r9d # check OSXSAVE bit
+ bt \$IA32CAP_BIT1_OSXSAVE,%r9d # check OSXSAVE bit
jnc .Lclear_avx
xor %ecx,%ecx # XCR0
.byte 0x0f,0x01,0xd0 # xgetbv
@@ -150,7 +154,7 @@ OPENSSL_ia32_cpuid:
cmp \$6,%eax
je .Ldone
.Lclear_avx:
- mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
+ mov \$(~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)),%eax
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
.Ldone:
shl \$32,%r9
diff --git a/lib/libcrypto/x86_arch.h b/lib/libcrypto/x86_arch.h
new file mode 100644
index 00000000000..5b2cf97546f
--- /dev/null
+++ b/lib/libcrypto/x86_arch.h
@@ -0,0 +1,90 @@
+/* $OpenBSD: x86_arch.h,v 1.1 2016/11/04 17:30:30 miod Exp $ */
+/*
+ * Copyright (c) 2016 Miodrag Vallat.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * The knowledge of the layout of OPENSSL_ia32cap_P is internal to libcrypto
+ * (and, to some extent, to libssl), and may change in the future without
+ * notice.
+ */
+
+/*
+ * OPENSSL_ia32cap_P is computed at runtime by OPENSSL_ia32_cpuid().
+ *
+ * On processors which lack the cpuid instruction, the value is always
+ * zero (this only matters on 32-bit processors, of course).
+ *
+ * On processors which support the cpuid instruction, after running
+ * "cpuid 1", the value of %edx is written to the low word of OPENSSL_ia32cap_P,
+ * and the value of %ecx is written to its high word.
+ *
+ * Further processing is done to set or clear specific bits, depending
+ * upon the exact processor type.
+ *
+ * Assembly routines usually address OPENSSL_ia32cap_P as two 32-bit words,
+ * hence two sets of bit numbers and masks. OPENSSL_cpu_caps() returns the
+ * complete 64-bit word.
+ */
+
+/* bit numbers for the low word */
+#define IA32CAP_BIT0_FPU 0
+#define IA32CAP_BIT0_MMX 23
+#define IA32CAP_BIT0_FXSR 24
+#define IA32CAP_BIT0_SSE 25
+#define IA32CAP_BIT0_SSE2 26
+#define IA32CAP_BIT0_HT 28
+
+/* the following bits are not obtained from cpuid */
+#define IA32CAP_BIT0_INTELP4 20
+#define IA32CAP_BIT0_INTEL 30
+
+/* bit numbers for the high word */
+#define IA32CAP_BIT1_PCLMUL 1
+#define IA32CAP_BIT1_SSSE3 9
+#define IA32CAP_BIT1_FMA3 12
+#define IA32CAP_BIT1_AESNI 25
+#define IA32CAP_BIT1_OSXSAVE 27
+#define IA32CAP_BIT1_AVX 28
+
+#define IA32CAP_BIT1_AMD_XOP 11
+
+/* bit masks for the low word */
+#define IA32CAP_MASK0_MMX (1 << IA32CAP_BIT0_MMX)
+#define IA32CAP_MASK0_FXSR (1 << IA32CAP_BIT0_FXSR)
+#define IA32CAP_MASK0_SSE (1 << IA32CAP_BIT0_SSE)
+#define IA32CAP_MASK0_SSE2 (1 << IA32CAP_BIT0_SSE2)
+#define IA32CAP_MASK0_HT (1 << IA32CAP_BIT0_HT)
+
+#define IA32CAP_MASK0_INTELP4 (1 << IA32CAP_BIT0_INTELP4)
+#define IA32CAP_MASK0_INTEL (1 << IA32CAP_BIT0_INTEL)
+
+/* bit masks for the high word */
+#define IA32CAP_MASK1_PCLMUL (1 << IA32CAP_BIT1_PCLMUL)
+#define IA32CAP_MASK1_SSSE3 (1 << IA32CAP_BIT1_SSSE3)
+#define IA32CAP_MASK1_FMA3 (1 << IA32CAP_BIT1_FMA3)
+#define IA32CAP_MASK1_AESNI (1 << IA32CAP_BIT1_AESNI)
+#define IA32CAP_MASK1_AVX (1 << IA32CAP_BIT1_AVX)
+
+#define IA32CAP_MASK1_AMD_XOP (1 << IA32CAP_BIT1_AMD_XOP)
+
+/* bit masks for OPENSSL_cpu_caps() */
+#define CPUCAP_MASK_MMX IA32CAP_MASK0_MMX
+#define CPUCAP_MASK_FXSR IA32CAP_MASK0_FXSR
+#define CPUCAP_MASK_SSE IA32CAP_MASK0_SSE
+#define CPUCAP_MASK_INTELP4 IA32CAP_MASK0_INTELP4
+#define CPUCAP_MASK_PCLMUL (1ULL << (32 + IA32CAP_BIT1_PCLMUL))
+#define CPUCAP_MASK_SSSE3 (1ULL << (32 + IA32CAP_BIT1_SSSE3))
+#define CPUCAP_MASK_AESNI (1ULL << (32 + IA32CAP_BIT1_AESNI))
diff --git a/lib/libcrypto/x86cpuid.pl b/lib/libcrypto/x86cpuid.pl
index 7918629f643..8b9570fc726 100644
--- a/lib/libcrypto/x86cpuid.pl
+++ b/lib/libcrypto/x86cpuid.pl
@@ -56,8 +56,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&mov ("esi","eax");
&mov ("eax",0x80000001);
&cpuid ();
- &or ("ebp","ecx");
- &and ("ebp",1<<11|1); # isolate XOP bit
+ &and ("ecx","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP bit
+ &or ("ecx",1); # make sure ecx is not zero
+ &mov ("ebp","ecx");
+
&cmp ("esi",0x80000008);
&jb (&label("intel"));
@@ -69,13 +71,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&mov ("eax",1);
&xor ("ecx","ecx");
&cpuid ();
- &bt ("edx",28);
+ &bt ("edx","\$IA32CAP_BIT0_HT");
&jnc (&label("generic"));
&shr ("ebx",16);
&and ("ebx",0xff);
&cmp ("ebx","esi");
&ja (&label("generic"));
- &and ("edx",0xefffffff); # clear hyper-threading bit
+ &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit
&jmp (&label("generic"));
&set_label("intel");
@@ -94,34 +96,38 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&mov ("eax",1);
&xor ("ecx","ecx");
&cpuid ();
- &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0
+ # force reserved bits to 0.
+ &and ("edx","\$~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)");
&cmp ("ebp",0);
&jne (&label("notintel"));
- &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs
- &and (&HB("eax"),15); # familiy ID
+ # set reserved bit#30 on Intel CPUs
+ &or ("edx","\$IA32CAP_MASK0_INTEL");
+ &and (&HB("eax"),15); # family ID
&cmp (&HB("eax"),15); # P4?
&jne (&label("notintel"));
- &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR
+ # set reserved bit#20 to engage RC4_CHAR
+ &or ("edx","\$IA32CAP_MASK0_INTELP4");
&set_label("notintel");
- &bt ("edx",28); # test hyper-threading bit
+ &bt ("edx","\$IA32CAP_BIT0_HT"); # test hyper-threading bit
&jnc (&label("generic"));
- &and ("edx",0xefffffff);
+ &xor ("edx","\$IA32CAP_MASK0_HT");
&cmp ("edi",0);
&je (&label("generic"));
- &or ("edx",0x10000000);
+ &or ("edx","\$IA32CAP_MASK0_HT");
&shr ("ebx",16);
- &cmp (&LB("ebx"),1);
+ &cmp (&LB("ebx"),1); # see if cache is shared
&ja (&label("generic"));
- &and ("edx",0xefffffff); # clear hyper-threading bit if not
+ &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit if not
&set_label("generic");
- &and ("ebp",1<<11); # isolate AMD XOP flag
- &and ("ecx",0xfffff7ff); # force 11th bit to 0
+ &and ("ebp","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP flag
+ # force reserved bits to 0.
+ &and ("ecx","\$~IA32CAP_MASK1_AMD_XOP");
&mov ("esi","edx");
&or ("ebp","ecx"); # merge AMD XOP flag
- &bt ("ecx",27); # check OSXSAVE bit
+ &bt ("ecx","\$IA32CAP_BIT1_OSXSAVE"); # check OSXSAVE bit
&jnc (&label("clear_avx"));
&xor ("ecx","ecx");
&data_byte(0x0f,0x01,0xd0); # xgetbv
@@ -131,10 +137,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&cmp ("eax",2);
&je (&label("clear_avx"));
&set_label("clear_xmm");
- &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits
- &and ("esi",0xfeffffff); # clear FXSR
+ # clear AESNI and PCLMULQDQ bits.
+ &and ("ebp","\$~(IA32CAP_MASK1_AESNI | IA32CAP_MASK1_PCLMUL)");
+ # clear FXSR.
+ &and ("esi","\$~IA32CAP_MASK0_FXSR");
&set_label("clear_avx");
- &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
+ # clear AVX, FMA3 and AMD XOP bits.
+ &and ("ebp","\$~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)");
&set_label("done");
&mov ("eax","esi");
&mov ("edx","ebp");
@@ -143,16 +152,17 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&external_label("OPENSSL_ia32cap_P");
-&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
+&function_begin_B("OPENSSL_wipe_cpu","");
&xor ("eax","eax");
&xor ("edx","edx");
&picmeup("ecx","OPENSSL_ia32cap_P");
&mov ("ecx",&DWP(0,"ecx"));
- &bt (&DWP(0,"ecx"),0);
+ &bt (&DWP(0,"ecx"),"\$IA32CAP_BIT0_FPU");
&jnc (&label("no_x87"));
if ($sse2) {
- &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits
- &cmp ("ecx",1<<26|1<<24);
+ # Check SSE2 and FXSR bits.
+ &and ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)");
+ &cmp ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)");
&jne (&label("no_sse2"));
&pxor ("xmm0","xmm0");
&pxor ("xmm1","xmm1");